Changeset 9ee2ce3 for thirdparty/SZ/sz/src/sz_float.c
- Timestamp:
- 09/28/18 16:32:55 (6 years ago)
- Branches:
- master, pympi
- Children:
- e6aa0eb
- Parents:
- abca157
- git-author:
- Hal Finkel <hfinkel@…> (09/28/18 16:32:55)
- git-committer:
- Hal Finkel <hfinkel@…> (09/28/18 16:32:55)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
thirdparty/SZ/sz/src/sz_float.c
r2c47b73 r9ee2ce3 1 1 /** 2 2 * @file sz_float.c 3 * @author Sheng Di and Dingwen Tao3 * @author Sheng Di, Dingwen Tao, Xin Liang 4 4 * @date Aug, 2016 5 5 * @brief SZ_Init, Compression and Decompression functions … … 26 26 #include "rw.h" 27 27 #include "sz_float_ts.h" 28 #include "utility.h" 28 29 29 30 unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize) … … 406 407 pred = last3CmprsData[0]; 407 408 predAbsErr = fabs(curData - pred); 408 if(predAbsErr< =checkRadius)409 if(predAbsErr<checkRadius) 409 410 { 410 411 state = (predAbsErr/realPrecision+1)/2; … … 1357 1358 } 1358 1359 else 1359 { 1360 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1360 { 1361 if(sz_with_regression == SZ_NO_REGRESSION) 1362 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1363 else 1364 *newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); 1361 1365 compressionType = 0; //snapshot-based compression 1362 1366 multisteps->lastSnapshotStep = timestep; … … 1367 1371 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1368 1372 1369 1370 convertTDPStoFlatBytes_float(tdps, newByteData, outSize);1371 1372 if(*outSize>dataLength*sizeof(float))1373 SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);1374 1375 free_TightDataPointStorageF(tdps);1376 1373 if(tdps!=NULL) 1374 { 1375 convertTDPStoFlatBytes_float(tdps, newByteData, outSize); 1376 if(*outSize>dataLength*sizeof(float)) 1377 SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); 1378 free_TightDataPointStorageF(tdps); 1379 } 1380 1377 1381 return compressionType; 1378 1382 } … … 1771 1775 if(errBoundMode>=PW_REL) 1772 1776 { 1773 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max);1774 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);1777 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); 1778 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); 1775 1779 } 1776 1780 else … … 1780 1784 { 1781 1785 if(errBoundMode>=PW_REL) 1782 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr (newByteData, oriData, realPrecision, r2, r1, outSize, min, max);1786 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max); 1783 1787 else 1784 1788 SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); … … 1787 1791 { 1788 1792 if(errBoundMode>=PW_REL) 1789 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);1793 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max); 1790 1794 else 1791 1795 SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); … … 1794 1798 { 1795 1799 if(errBoundMode>=PW_REL) 1796 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);1800 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max); 1797 1801 else 1798 1802 SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); … … 1855 1859 if(confparams_cpr->errorBoundMode>=PW_REL) 1856 1860 { 1857 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max); 1858 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, 1859 valueRangeSize, medianValue, &tmpOutSize); 1861 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); 1862 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); 1860 1863 } 1861 1864 else … … 1871 1874 { 1872 1875 if(confparams_cpr->errorBoundMode>=PW_REL) 1873 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr (&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max);1876 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); 1874 1877 else 1875 1878 #ifdef HAVE_TIMECMPR … … 1878 1881 else 1879 1882 #endif 1880 SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1883 { 1884 if(sz_with_regression == SZ_NO_REGRESSION) 1885 SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1886 else 1887 tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); 1888 } 1881 1889 } 1882 1890 else … … 1884 1892 { 1885 1893 if(confparams_cpr->errorBoundMode>=PW_REL) 1886 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max);1894 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); 1887 1895 else 1888 1896 #ifdef HAVE_TIMECMPR 1889 1897 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 1890 multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);1898 multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1891 1899 else 1892 1900 #endif 1893 SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1901 { 1902 if(sz_with_regression == SZ_NO_REGRESSION) 1903 SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1904 else 1905 tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); 1906 } 1894 1907 } 1895 1908 else … … 1897 1910 { 1898 1911 if(confparams_cpr->errorBoundMode>=PW_REL) 1899 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max);1912 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); 1900 1913 //ToDO 1901 1914 //SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max); … … 1906 1919 else 1907 1920 #endif 1908 SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1921 { 1922 if(sz_with_regression == SZ_NO_REGRESSION) 1923 SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1924 else 1925 tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); 1926 } 1909 1927 } 1910 1928 else … … 1921 1939 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION) 1922 1940 { 1923 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1941 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1924 1942 free(tmpByteData); 1925 1943 } … … 3375 3393 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3376 3394 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3377 size_t totalSampleSize = 0; //(r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance;3395 size_t totalSampleSize = 0; 3378 3396 3379 3397 size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset … … 3390 3408 { 3391 3409 radiusIndex = confparams_cpr->maxRangeRadius - 1; 3392 //printf("radiusIndex=%d\n", radiusIndex);3393 3410 } 3394 3411 intervals[radiusIndex]++; 3395 // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData);3396 // fflush(stdout);3397 3412 offset_count += confparams_cpr->sampleDistance; 3398 3413 if(offset_count >= r3){ … … 3410 3425 else data_pos += confparams_cpr->sampleDistance; 3411 3426 } 3412 // printf("sample_count: %ld\n", sample_count);3413 // fflush(stdout);3414 // if(*max_freq < 0.15) *max_freq *= 2;3415 3427 //compute the appropriate number 3416 3428 size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; … … 3430 3442 powerOf2 = 32; 3431 3443 free(intervals); 3432 //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);3433 3444 return powerOf2; 3434 3445 } … … 3750 3761 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3751 3762 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3752 size_t totalSampleSize = 0;//(r1-1)*(r2-1)/confparams_cpr->sampleDistance; 3753 3754 //float max = oriData[0]; 3755 //float min = oriData[0]; 3763 size_t totalSampleSize = 0; 3756 3764 3757 3765 size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset … … 3812 3820 while(data_pos - oriData < dataLength){ 3813 3821 totalSampleSize++; 3814 //pred_value = 2*data_pos[-1] - data_pos[-2];3815 3822 pred_value = data_pos[-1]; 3816 3823 pred_err = fabs(pred_value - *data_pos); … … 3841 3848 3842 3849 free(intervals); 3843 //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);3844 3850 return powerOf2; 3845 3851 } … … 4037 4043 } 4038 4044 4045 /*The above code is for sz 1.4.13; the following code is for sz 2.0*/ 4046 4047 unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) 4048 { 4049 float mean = 0.0; 4050 size_t len = r1 * r2; 4051 size_t mean_distance = (int) (sqrt(len)); 4052 4053 float * data_pos = oriData; 4054 size_t mean_count = 0; 4055 while(data_pos - oriData < len){ 4056 mean += *data_pos; 4057 mean_count ++; 4058 data_pos += mean_distance; 4059 } 4060 if(mean_count > 0) mean /= mean_count; 4061 size_t range = 8192; 4062 size_t radius = 4096; 4063 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 4064 memset(freq_intervals, 0, range*sizeof(size_t)); 4065 4066 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 4067 int sampleDistance = confparams_cpr->sampleDistance; 4068 float predThreshold = confparams_cpr->predThreshold; 4069 4070 size_t i; 4071 size_t radiusIndex; 4072 float pred_value = 0, pred_err; 4073 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 4074 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 4075 4076 float mean_diff; 4077 ptrdiff_t freq_index; 4078 size_t freq_count = 0; 4079 size_t n1_count = 1; 4080 size_t offset_count = sampleDistance - 1; 4081 size_t offset_count_2 = 0; 4082 size_t sample_count = 0; 4083 data_pos = oriData + r2 + offset_count; 4084 while(data_pos - oriData < len){ 4085 pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; 4086 pred_err = fabs(pred_value - *data_pos); 4087 if(pred_err < realPrecision) freq_count ++; 4088 radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); 4089 if(radiusIndex>=maxRangeRadius) 4090 radiusIndex = maxRangeRadius - 1; 4091 intervals[radiusIndex]++; 4092 4093 mean_diff = *data_pos - mean; 4094 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 4095 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 4096 if(freq_index <= 0){ 4097 freq_intervals[0] ++; 4098 } 4099 else if(freq_index >= range){ 4100 freq_intervals[range - 1] ++; 4101 } 4102 else{ 4103 freq_intervals[freq_index] ++; 4104 } 4105 offset_count += sampleDistance; 4106 if(offset_count >= r2){ 4107 n1_count ++; 4108 offset_count_2 = n1_count % sampleDistance; 4109 data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 4110 offset_count = (sampleDistance - offset_count_2); 4111 if(offset_count == 0) offset_count ++; 4112 } 4113 else data_pos += sampleDistance; 4114 sample_count ++; 4115 } 4116 *max_freq = freq_count * 1.0/ sample_count; 4117 4118 //compute the appropriate number 4119 size_t targetCount = sample_count*predThreshold; 4120 size_t sum = 0; 4121 for(i=0;i<maxRangeRadius;i++) 4122 { 4123 sum += intervals[i]; 4124 if(sum>targetCount) 4125 break; 4126 } 4127 if(i>=maxRangeRadius) 4128 i = maxRangeRadius-1; 4129 unsigned int accIntervals = 2*(i+1); 4130 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 4131 4132 if(powerOf2<32) 4133 powerOf2 = 32; 4134 4135 // collect frequency 4136 size_t max_sum = 0; 4137 size_t max_index = 0; 4138 size_t tmp_sum; 4139 size_t * freq_pos = freq_intervals + 1; 4140 for(size_t i=1; i<range-2; i++){ 4141 tmp_sum = freq_pos[0] + freq_pos[1]; 4142 if(tmp_sum > max_sum){ 4143 max_sum = tmp_sum; 4144 max_index = i; 4145 } 4146 freq_pos ++; 4147 } 4148 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 4149 *mean_freq = max_sum * 1.0 / sample_count; 4150 4151 free(freq_intervals); 4152 free(intervals); 4153 return powerOf2; 4154 } 4155 4156 // 2D: modified for higher performance 4157 #define MIN(a, b) a<b? a : b 4158 unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ 4159 4160 unsigned int quantization_intervals; 4161 float sz_sample_correct_freq = -1;//0.5; //-1 4162 float dense_pos; 4163 float mean_flush_freq; 4164 unsigned char use_mean = 0; 4165 4166 if(exe_params->optQuantMode==1) 4167 { 4168 quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 4169 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 4170 updateQuantizationInfo(quantization_intervals); 4171 } 4172 else{ 4173 quantization_intervals = exe_params->intvCapacity; 4174 } 4175 4176 // calculate block dims 4177 size_t num_x, num_y; 4178 size_t block_size = 16; 4179 4180 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 4181 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 4182 4183 size_t split_index_x, split_index_y; 4184 size_t early_blockcount_x, early_blockcount_y; 4185 size_t late_blockcount_x, late_blockcount_y; 4186 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 4187 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 4188 4189 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; 4190 size_t num_blocks = num_x * num_y; 4191 size_t num_elements = r1 * r2; 4192 4193 size_t dim0_offset = r2; 4194 4195 int * result_type = (int *) malloc(num_elements * sizeof(int)); 4196 size_t unpred_data_max_size = max_num_block_elements; 4197 float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); 4198 size_t total_unpred = 0; 4199 size_t unpredictable_count; 4200 float * data_pos = oriData; 4201 int * type = result_type; 4202 size_t offset_x, offset_y; 4203 size_t current_blockcount_x, current_blockcount_y; 4204 4205 float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); 4206 float * reg_params_pos = reg_params; 4207 // move regression part out 4208 size_t params_offset_b = num_blocks; 4209 size_t params_offset_c = 2*num_blocks; 4210 for(size_t i=0; i<num_x; i++){ 4211 for(size_t j=0; j<num_y; j++){ 4212 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4213 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4214 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4215 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4216 4217 data_pos = oriData + offset_x * dim0_offset + offset_y; 4218 4219 { 4220 float * cur_data_pos = data_pos; 4221 float fx = 0.0; 4222 float fy = 0.0; 4223 float f = 0; 4224 double sum_x; 4225 float curData; 4226 for(size_t i=0; i<current_blockcount_x; i++){ 4227 sum_x = 0; 4228 for(size_t j=0; j<current_blockcount_y; j++){ 4229 curData = *cur_data_pos; 4230 sum_x += curData; 4231 fy += curData * j; 4232 cur_data_pos ++; 4233 } 4234 fx += sum_x * i; 4235 f += sum_x; 4236 cur_data_pos += dim0_offset - current_blockcount_y; 4237 } 4238 float coeff = 1.0 / (current_blockcount_x * current_blockcount_y); 4239 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 4240 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 4241 reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2); 4242 } 4243 4244 reg_params_pos ++; 4245 } 4246 } 4247 4248 //Compress coefficient arrays 4249 double precision_a, precision_b, precision_c; 4250 float rel_param_err = 0.15/3; 4251 precision_a = rel_param_err * realPrecision / late_blockcount_x; 4252 precision_b = rel_param_err * realPrecision / late_blockcount_y; 4253 precision_c = rel_param_err * realPrecision; 4254 4255 float mean = 0; 4256 use_mean = 0; 4257 if(use_mean){ 4258 // compute mean 4259 double sum = 0.0; 4260 size_t mean_count = 0; 4261 for(size_t i=0; i<num_elements; i++){ 4262 if(fabs(oriData[i] - dense_pos) < realPrecision){ 4263 sum += oriData[i]; 4264 mean_count ++; 4265 } 4266 } 4267 if(mean_count > 0) mean = sum / mean_count; 4268 } 4269 4270 4271 double tmp_realPrecision = realPrecision; 4272 4273 // use two prediction buffers for higher performance 4274 float * unpredictable_data = result_unpredictable_data; 4275 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 4276 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 4277 size_t reg_count = 0; 4278 size_t strip_dim_0 = early_blockcount_x + 1; 4279 size_t strip_dim_1 = r2 + 1; 4280 size_t strip_dim0_offset = strip_dim_1; 4281 unsigned char * indicator_pos = indicator; 4282 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); 4283 float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); 4284 memset(prediction_buffer_1, 0, prediction_buffer_size); 4285 float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); 4286 memset(prediction_buffer_2, 0, prediction_buffer_size); 4287 float * cur_pb_buf = prediction_buffer_1; 4288 float * next_pb_buf = prediction_buffer_2; 4289 float * cur_pb_buf_pos; 4290 float * next_pb_buf_pos; 4291 int intvCapacity = exe_params->intvCapacity; 4292 int intvRadius = exe_params->intvRadius; 4293 int use_reg = 0; 4294 4295 reg_params_pos = reg_params; 4296 // compress the regression coefficients on the fly 4297 float last_coeffcients[3] = {0.0}; 4298 int coeff_intvCapacity_sz = 65536; 4299 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 4300 int * coeff_type[3]; 4301 int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); 4302 float * coeff_unpred_data[3]; 4303 float * coeff_unpredictable_data = (float *) malloc(num_blocks*3*sizeof(float)); 4304 double precision[3]; 4305 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; 4306 for(int i=0; i<3; i++){ 4307 coeff_type[i] = coeff_result_type + i * num_blocks; 4308 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 4309 } 4310 int coeff_index = 0; 4311 unsigned int coeff_unpredictable_count[3] = {0}; 4312 if(use_mean){ 4313 type = result_type; 4314 int intvCapacity_sz = intvCapacity - 2; 4315 for(size_t i=0; i<num_x; i++){ 4316 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4317 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4318 data_pos = oriData + offset_x * dim0_offset; 4319 4320 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 4321 next_pb_buf_pos = next_pb_buf + 1; 4322 float * pb_pos = cur_pb_buf_pos; 4323 float * next_pb_pos = next_pb_buf_pos; 4324 4325 for(size_t j=0; j<num_y; j++){ 4326 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4327 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4328 4329 /*sampling: decide which predictor to use (regression or lorenzo)*/ 4330 { 4331 float * cur_data_pos; 4332 float curData; 4333 float pred_reg, pred_sz; 4334 float err_sz = 0.0, err_reg = 0.0; 4335 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 4336 // [1, 9] [3, 7] [7, 3] [9, 1] 4337 int count = 0; 4338 for(int i=1; i<current_blockcount_x; i+=2){ 4339 cur_data_pos = data_pos + i * dim0_offset + i; 4340 curData = *cur_data_pos; 4341 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4342 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 4343 4344 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 4345 4346 err_reg += fabs(pred_reg - curData); 4347 4348 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 4349 curData = *cur_data_pos; 4350 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4351 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 4352 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 4353 4354 err_reg += fabs(pred_reg - curData); 4355 4356 count += 2; 4357 } 4358 4359 use_reg = (err_reg < err_sz); 4360 } 4361 if(use_reg) 4362 { 4363 { 4364 /*predict coefficients in current block via previous reg_block*/ 4365 float cur_coeff; 4366 double diff, itvNum; 4367 for(int e=0; e<3; e++){ 4368 cur_coeff = reg_params_pos[e*num_blocks]; 4369 diff = cur_coeff - last_coeffcients[e]; 4370 itvNum = fabs(diff)/precision[e] + 1; 4371 if (itvNum < coeff_intvCapacity_sz){ 4372 if (diff < 0) itvNum = -itvNum; 4373 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4374 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4375 //ganrantee comporession error against the case of machine-epsilon 4376 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4377 coeff_type[e][coeff_index] = 0; 4378 last_coeffcients[e] = cur_coeff; 4379 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4380 } 4381 } 4382 else{ 4383 coeff_type[e][coeff_index] = 0; 4384 last_coeffcients[e] = cur_coeff; 4385 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4386 } 4387 } 4388 coeff_index ++; 4389 } 4390 float curData; 4391 float pred; 4392 double itvNum; 4393 double diff; 4394 size_t index = 0; 4395 size_t block_unpredictable_count = 0; 4396 float * cur_data_pos = data_pos; 4397 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4398 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4399 curData = *cur_data_pos; 4400 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4401 diff = curData - pred; 4402 itvNum = fabs(diff)/realPrecision + 1; 4403 if (itvNum < intvCapacity){ 4404 if (diff < 0) itvNum = -itvNum; 4405 type[index] = (int) (itvNum/2) + intvRadius; 4406 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4407 //ganrantee comporession error against the case of machine-epsilon 4408 if(fabs(curData - pred)>realPrecision){ 4409 type[index] = 0; 4410 pred = curData; 4411 unpredictable_data[block_unpredictable_count ++] = curData; 4412 } 4413 } 4414 else{ 4415 type[index] = 0; 4416 pred = curData; 4417 unpredictable_data[block_unpredictable_count ++] = curData; 4418 } 4419 index ++; 4420 cur_data_pos ++; 4421 } 4422 /*dealing with the last jj (boundary)*/ 4423 { 4424 size_t jj = current_blockcount_y - 1; 4425 curData = *cur_data_pos; 4426 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4427 diff = curData - pred; 4428 itvNum = fabs(diff)/realPrecision + 1; 4429 if (itvNum < intvCapacity){ 4430 if (diff < 0) itvNum = -itvNum; 4431 type[index] = (int) (itvNum/2) + intvRadius; 4432 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4433 //ganrantee comporession error against the case of machine-epsilon 4434 if(fabs(curData - pred)>realPrecision){ 4435 type[index] = 0; 4436 pred = curData; 4437 unpredictable_data[block_unpredictable_count ++] = curData; 4438 } 4439 } 4440 else{ 4441 type[index] = 0; 4442 pred = curData; 4443 unpredictable_data[block_unpredictable_count ++] = curData; 4444 } 4445 4446 // assign value to block surfaces 4447 pb_pos[ii * strip_dim0_offset + jj] = pred; 4448 index ++; 4449 cur_data_pos ++; 4450 } 4451 cur_data_pos += dim0_offset - current_blockcount_y; 4452 } 4453 /*dealing with the last ii (boundary)*/ 4454 { 4455 size_t ii = current_blockcount_x - 1; 4456 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4457 curData = *cur_data_pos; 4458 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4459 diff = curData - pred; 4460 itvNum = fabs(diff)/realPrecision + 1; 4461 if (itvNum < intvCapacity){ 4462 if (diff < 0) itvNum = -itvNum; 4463 type[index] = (int) (itvNum/2) + intvRadius; 4464 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4465 //ganrantee comporession error against the case of machine-epsilon 4466 if(fabs(curData - pred)>realPrecision){ 4467 type[index] = 0; 4468 pred = curData; 4469 unpredictable_data[block_unpredictable_count ++] = curData; 4470 } 4471 } 4472 else{ 4473 type[index] = 0; 4474 pred = curData; 4475 unpredictable_data[block_unpredictable_count ++] = curData; 4476 } 4477 // assign value to next prediction buffer 4478 next_pb_pos[jj] = pred; 4479 index ++; 4480 cur_data_pos ++; 4481 } 4482 /*dealing with the last jj (boundary)*/ 4483 { 4484 size_t jj = current_blockcount_y - 1; 4485 curData = *cur_data_pos; 4486 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4487 diff = curData - pred; 4488 itvNum = fabs(diff)/realPrecision + 1; 4489 if (itvNum < intvCapacity){ 4490 if (diff < 0) itvNum = -itvNum; 4491 type[index] = (int) (itvNum/2) + intvRadius; 4492 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4493 //ganrantee comporession error against the case of machine-epsilon 4494 if(fabs(curData - pred)>realPrecision){ 4495 type[index] = 0; 4496 pred = curData; 4497 unpredictable_data[block_unpredictable_count ++] = curData; 4498 } 4499 } 4500 else{ 4501 type[index] = 0; 4502 pred = curData; 4503 unpredictable_data[block_unpredictable_count ++] = curData; 4504 } 4505 4506 // assign value to block surfaces 4507 pb_pos[ii * strip_dim0_offset + jj] = pred; 4508 // assign value to next prediction buffer 4509 next_pb_pos[jj] = pred; 4510 4511 index ++; 4512 cur_data_pos ++; 4513 } 4514 } // end ii == -1 4515 unpredictable_count = block_unpredictable_count; 4516 total_unpred += unpredictable_count; 4517 unpredictable_data += unpredictable_count; 4518 reg_count ++; 4519 }// end use_reg 4520 else{ 4521 // use SZ 4522 // SZ predication 4523 unpredictable_count = 0; 4524 float * cur_pb_pos = pb_pos; 4525 float * cur_data_pos = data_pos; 4526 float curData; 4527 float pred2D; 4528 double itvNum, diff; 4529 size_t index = 0; 4530 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4531 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4532 curData = *cur_data_pos; 4533 if(fabs(curData - mean) <= realPrecision){ 4534 // adjust type[index] to intvRadius for coherence with freq in reg 4535 type[index] = intvRadius; 4536 *cur_pb_pos = mean; 4537 } 4538 else 4539 { 4540 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4541 diff = curData - pred2D; 4542 itvNum = fabs(diff)/realPrecision + 1; 4543 if (itvNum < intvCapacity_sz){ 4544 if (diff < 0) itvNum = -itvNum; 4545 type[index] = (int) (itvNum/2) + intvRadius; 4546 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4547 if(type[index] <= intvRadius) type[index] -= 1; 4548 //ganrantee comporession error against the case of machine-epsilon 4549 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4550 type[index] = 0; 4551 *cur_pb_pos = curData; 4552 unpredictable_data[unpredictable_count ++] = curData; 4553 } 4554 } 4555 else{ 4556 type[index] = 0; 4557 *cur_pb_pos = curData; 4558 unpredictable_data[unpredictable_count ++] = curData; 4559 } 4560 } 4561 index ++; 4562 cur_pb_pos ++; 4563 cur_data_pos ++; 4564 } 4565 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 4566 cur_data_pos += dim0_offset - current_blockcount_y; 4567 } 4568 /*dealing with the last ii (boundary)*/ 4569 { 4570 // ii == current_blockcount_x - 1 4571 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4572 curData = *cur_data_pos; 4573 if(fabs(curData - mean) <= realPrecision){ 4574 // adjust type[index] to intvRadius for coherence with freq in reg 4575 type[index] = intvRadius; 4576 *cur_pb_pos = mean; 4577 } 4578 else 4579 { 4580 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4581 diff = curData - pred2D; 4582 itvNum = fabs(diff)/realPrecision + 1; 4583 if (itvNum < intvCapacity_sz){ 4584 if (diff < 0) itvNum = -itvNum; 4585 type[index] = (int) (itvNum/2) + intvRadius; 4586 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4587 if(type[index] <= intvRadius) type[index] -= 1; 4588 //ganrantee comporession error against the case of machine-epsilon 4589 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4590 type[index] = 0; 4591 *cur_pb_pos = curData; 4592 unpredictable_data[unpredictable_count ++] = curData; 4593 } 4594 } 4595 else{ 4596 type[index] = 0; 4597 *cur_pb_pos = curData; 4598 unpredictable_data[unpredictable_count ++] = curData; 4599 } 4600 } 4601 next_pb_pos[jj] = *cur_pb_pos; 4602 index ++; 4603 cur_pb_pos ++; 4604 cur_data_pos ++; 4605 } 4606 } 4607 total_unpred += unpredictable_count; 4608 unpredictable_data += unpredictable_count; 4609 // change indicator 4610 indicator_pos[j] = 1; 4611 }// end SZ 4612 reg_params_pos ++; 4613 data_pos += current_blockcount_y; 4614 pb_pos += current_blockcount_y; 4615 next_pb_pos += current_blockcount_y; 4616 type += current_blockcount_x * current_blockcount_y; 4617 }// end j 4618 indicator_pos += num_y; 4619 float * tmp; 4620 tmp = cur_pb_buf; 4621 cur_pb_buf = next_pb_buf; 4622 next_pb_buf = tmp; 4623 }// end i 4624 }// end use mean 4625 else{ 4626 type = result_type; 4627 int intvCapacity_sz = intvCapacity - 2; 4628 for(size_t i=0; i<num_x; i++){ 4629 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4630 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4631 data_pos = oriData + offset_x * dim0_offset; 4632 4633 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 4634 next_pb_buf_pos = next_pb_buf + 1; 4635 float * pb_pos = cur_pb_buf_pos; 4636 float * next_pb_pos = next_pb_buf_pos; 4637 4638 for(size_t j=0; j<num_y; j++){ 4639 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4640 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4641 /*sampling*/ 4642 { 4643 // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i] 4644 float * cur_data_pos; 4645 float curData; 4646 float pred_reg, pred_sz; 4647 float err_sz = 0.0, err_reg = 0.0; 4648 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 4649 // [1, 9] [3, 7] [7, 3] [9, 1] 4650 int count = 0; 4651 for(int i=1; i<current_blockcount_x; i+=2){ 4652 cur_data_pos = data_pos + i * dim0_offset + i; 4653 curData = *cur_data_pos; 4654 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4655 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 4656 err_sz += fabs(pred_sz - curData); 4657 err_reg += fabs(pred_reg - curData); 4658 4659 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 4660 curData = *cur_data_pos; 4661 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4662 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 4663 err_sz += fabs(pred_sz - curData); 4664 err_reg += fabs(pred_reg - curData); 4665 4666 count += 2; 4667 } 4668 err_sz += realPrecision * count * 0.81; 4669 use_reg = (err_reg < err_sz); 4670 4671 } 4672 if(use_reg) 4673 { 4674 { 4675 /*predict coefficients in current block via previous reg_block*/ 4676 float cur_coeff; 4677 double diff, itvNum; 4678 for(int e=0; e<3; e++){ 4679 cur_coeff = reg_params_pos[e*num_blocks]; 4680 diff = cur_coeff - last_coeffcients[e]; 4681 itvNum = fabs(diff)/precision[e] + 1; 4682 if (itvNum < coeff_intvCapacity_sz){ 4683 if (diff < 0) itvNum = -itvNum; 4684 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4685 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4686 //ganrantee comporession error against the case of machine-epsilon 4687 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4688 coeff_type[e][coeff_index] = 0; 4689 last_coeffcients[e] = cur_coeff; 4690 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4691 } 4692 } 4693 else{ 4694 coeff_type[e][coeff_index] = 0; 4695 last_coeffcients[e] = cur_coeff; 4696 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4697 } 4698 } 4699 coeff_index ++; 4700 } 4701 float curData; 4702 float pred; 4703 double itvNum; 4704 double diff; 4705 size_t index = 0; 4706 size_t block_unpredictable_count = 0; 4707 float * cur_data_pos = data_pos; 4708 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4709 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4710 curData = *cur_data_pos; 4711 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4712 diff = curData - pred; 4713 itvNum = fabs(diff)/realPrecision + 1; 4714 if (itvNum < intvCapacity){ 4715 if (diff < 0) itvNum = -itvNum; 4716 type[index] = (int) (itvNum/2) + intvRadius; 4717 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4718 //ganrantee comporession error against the case of machine-epsilon 4719 if(fabs(curData - pred)>realPrecision){ 4720 type[index] = 0; 4721 pred = curData; 4722 unpredictable_data[block_unpredictable_count ++] = curData; 4723 } 4724 } 4725 else{ 4726 type[index] = 0; 4727 pred = curData; 4728 unpredictable_data[block_unpredictable_count ++] = curData; 4729 } 4730 index ++; 4731 cur_data_pos ++; 4732 } 4733 /*dealing with the last jj (boundary)*/ 4734 { 4735 // jj == current_blockcount_y - 1 4736 size_t jj = current_blockcount_y - 1; 4737 curData = *cur_data_pos; 4738 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4739 diff = curData - pred; 4740 itvNum = fabs(diff)/realPrecision + 1; 4741 if (itvNum < intvCapacity){ 4742 if (diff < 0) itvNum = -itvNum; 4743 type[index] = (int) (itvNum/2) + intvRadius; 4744 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4745 //ganrantee comporession error against the case of machine-epsilon 4746 if(fabs(curData - pred)>realPrecision){ 4747 type[index] = 0; 4748 pred = curData; 4749 unpredictable_data[block_unpredictable_count ++] = curData; 4750 } 4751 } 4752 else{ 4753 type[index] = 0; 4754 pred = curData; 4755 unpredictable_data[block_unpredictable_count ++] = curData; 4756 } 4757 4758 // assign value to block surfaces 4759 pb_pos[ii * strip_dim0_offset + jj] = pred; 4760 index ++; 4761 cur_data_pos ++; 4762 } 4763 cur_data_pos += dim0_offset - current_blockcount_y; 4764 } 4765 /*dealing with the last ii (boundary)*/ 4766 { 4767 size_t ii = current_blockcount_x - 1; 4768 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4769 curData = *cur_data_pos; 4770 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4771 diff = curData - pred; 4772 itvNum = fabs(diff)/realPrecision + 1; 4773 if (itvNum < intvCapacity){ 4774 if (diff < 0) itvNum = -itvNum; 4775 type[index] = (int) (itvNum/2) + intvRadius; 4776 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4777 //ganrantee comporession error against the case of machine-epsilon 4778 if(fabs(curData - pred)>realPrecision){ 4779 type[index] = 0; 4780 pred = curData; 4781 unpredictable_data[block_unpredictable_count ++] = curData; 4782 } 4783 } 4784 else{ 4785 type[index] = 0; 4786 pred = curData; 4787 unpredictable_data[block_unpredictable_count ++] = curData; 4788 } 4789 // assign value to next prediction buffer 4790 next_pb_pos[jj] = pred; 4791 index ++; 4792 cur_data_pos ++; 4793 } 4794 /*dealing with the last jj (boundary)*/ 4795 { 4796 // jj == current_blockcount_y - 1 4797 size_t jj = current_blockcount_y - 1; 4798 curData = *cur_data_pos; 4799 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4800 diff = curData - pred; 4801 itvNum = fabs(diff)/realPrecision + 1; 4802 if (itvNum < intvCapacity){ 4803 if (diff < 0) itvNum = -itvNum; 4804 type[index] = (int) (itvNum/2) + intvRadius; 4805 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4806 //ganrantee comporession error against the case of machine-epsilon 4807 if(fabs(curData - pred)>realPrecision){ 4808 type[index] = 0; 4809 pred = curData; 4810 unpredictable_data[block_unpredictable_count ++] = curData; 4811 } 4812 } 4813 else{ 4814 type[index] = 0; 4815 pred = curData; 4816 unpredictable_data[block_unpredictable_count ++] = curData; 4817 } 4818 4819 // assign value to block surfaces 4820 pb_pos[ii * strip_dim0_offset + jj] = pred; 4821 // assign value to next prediction buffer 4822 next_pb_pos[jj] = pred; 4823 4824 index ++; 4825 cur_data_pos ++; 4826 } 4827 } // end ii == -1 4828 unpredictable_count = block_unpredictable_count; 4829 total_unpred += unpredictable_count; 4830 unpredictable_data += unpredictable_count; 4831 reg_count ++; 4832 }// end use_reg 4833 else{ 4834 // use SZ 4835 // SZ predication 4836 unpredictable_count = 0; 4837 float * cur_pb_pos = pb_pos; 4838 float * cur_data_pos = data_pos; 4839 float curData; 4840 float pred2D; 4841 double itvNum, diff; 4842 size_t index = 0; 4843 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4844 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4845 curData = *cur_data_pos; 4846 4847 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4848 diff = curData - pred2D; 4849 itvNum = fabs(diff)/realPrecision + 1; 4850 if (itvNum < intvCapacity_sz){ 4851 if (diff < 0) itvNum = -itvNum; 4852 type[index] = (int) (itvNum/2) + intvRadius; 4853 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4854 //ganrantee comporession error against the case of machine-epsilon 4855 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4856 type[index] = 0; 4857 *cur_pb_pos = curData; 4858 unpredictable_data[unpredictable_count ++] = curData; 4859 } 4860 } 4861 else{ 4862 type[index] = 0; 4863 *cur_pb_pos = curData; 4864 unpredictable_data[unpredictable_count ++] = curData; 4865 } 4866 4867 index ++; 4868 cur_pb_pos ++; 4869 cur_data_pos ++; 4870 } 4871 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 4872 cur_data_pos += dim0_offset - current_blockcount_y; 4873 } 4874 /*dealing with the last ii (boundary)*/ 4875 { 4876 // ii == current_blockcount_x - 1 4877 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4878 curData = *cur_data_pos; 4879 4880 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4881 diff = curData - pred2D; 4882 itvNum = fabs(diff)/realPrecision + 1; 4883 if (itvNum < intvCapacity_sz){ 4884 if (diff < 0) itvNum = -itvNum; 4885 type[index] = (int) (itvNum/2) + intvRadius; 4886 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4887 //ganrantee comporession error against the case of machine-epsilon 4888 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4889 type[index] = 0; 4890 *cur_pb_pos = curData; 4891 unpredictable_data[unpredictable_count ++] = curData; 4892 } 4893 } 4894 else{ 4895 type[index] = 0; 4896 *cur_pb_pos = curData; 4897 unpredictable_data[unpredictable_count ++] = curData; 4898 } 4899 next_pb_pos[jj] = *cur_pb_pos; 4900 index ++; 4901 cur_pb_pos ++; 4902 cur_data_pos ++; 4903 } 4904 } 4905 total_unpred += unpredictable_count; 4906 unpredictable_data += unpredictable_count; 4907 // change indicator 4908 indicator_pos[j] = 1; 4909 }// end SZ 4910 reg_params_pos ++; 4911 data_pos += current_blockcount_y; 4912 pb_pos += current_blockcount_y; 4913 next_pb_pos += current_blockcount_y; 4914 type += current_blockcount_x * current_blockcount_y; 4915 }// end j 4916 indicator_pos += num_y; 4917 float * tmp; 4918 tmp = cur_pb_buf; 4919 cur_pb_buf = next_pb_buf; 4920 next_pb_buf = tmp; 4921 }// end i 4922 } 4923 free(prediction_buffer_1); 4924 free(prediction_buffer_2); 4925 4926 int stateNum = 2*quantization_intervals; 4927 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4928 4929 size_t nodeCount = 0; 4930 size_t i = 0; 4931 init(huffmanTree, result_type, num_elements); 4932 for (i = 0; i < stateNum; i++) 4933 if (huffmanTree->code[i]) nodeCount++; 4934 nodeCount = nodeCount*2-1; 4935 4936 unsigned char *treeBytes; 4937 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4938 4939 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 4940 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 4941 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); 4942 unsigned char * result_pos = result; 4943 initRandomAccessBytes(result_pos); 4944 result_pos += meta_data_offset; 4945 4946 sizeToBytes(result_pos, num_elements); 4947 result_pos += exe_params->SZ_SIZE_TYPE; 4948 4949 intToBytes_bigEndian(result_pos, block_size); 4950 result_pos += sizeof(int); 4951 doubleToBytes(result_pos, realPrecision); 4952 result_pos += sizeof(double); 4953 intToBytes_bigEndian(result_pos, quantization_intervals); 4954 result_pos += sizeof(int); 4955 intToBytes_bigEndian(result_pos, treeByteSize); 4956 result_pos += sizeof(int); 4957 intToBytes_bigEndian(result_pos, nodeCount); 4958 result_pos += sizeof(int); 4959 memcpy(result_pos, treeBytes, treeByteSize); 4960 result_pos += treeByteSize; 4961 free(treeBytes); 4962 4963 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 4964 result_pos += sizeof(unsigned char); 4965 memcpy(result_pos, &mean, sizeof(float)); 4966 result_pos += sizeof(float); 4967 4968 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 4969 result_pos += indicator_size; 4970 4971 //convert the lead/mid/resi to byte stream 4972 if(reg_count>0){ 4973 for(int e=0; e<3; e++){ 4974 int stateNum = 2*coeff_intvCapacity_sz; 4975 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4976 size_t nodeCount = 0; 4977 init(huffmanTree, coeff_type[e], reg_count); 4978 size_t i = 0; 4979 for (i = 0; i < huffmanTree->stateNum; i++) 4980 if (huffmanTree->code[i]) nodeCount++; 4981 nodeCount = nodeCount*2-1; 4982 unsigned char *treeBytes; 4983 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4984 doubleToBytes(result_pos, precision[e]); 4985 result_pos += sizeof(double); 4986 intToBytes_bigEndian(result_pos, coeff_intvRadius); 4987 result_pos += sizeof(int); 4988 intToBytes_bigEndian(result_pos, treeByteSize); 4989 result_pos += sizeof(int); 4990 intToBytes_bigEndian(result_pos, nodeCount); 4991 result_pos += sizeof(int); 4992 memcpy(result_pos, treeBytes, treeByteSize); 4993 result_pos += treeByteSize; 4994 free(treeBytes); 4995 size_t typeArray_size = 0; 4996 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 4997 sizeToBytes(result_pos, typeArray_size); 4998 result_pos += sizeof(size_t) + typeArray_size; 4999 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 5000 result_pos += sizeof(int); 5001 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); 5002 result_pos += coeff_unpredictable_count[e]*sizeof(float); 5003 SZ_ReleaseHuffman(huffmanTree); 5004 } 5005 } 5006 free(coeff_result_type); 5007 free(coeff_unpredictable_data); 5008 5009 //record the number of unpredictable data and also store them 5010 memcpy(result_pos, &total_unpred, sizeof(size_t)); 5011 result_pos += sizeof(size_t); 5012 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); 5013 result_pos += total_unpred * sizeof(float); 5014 size_t typeArray_size = 0; 5015 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 5016 result_pos += typeArray_size; 5017 5018 size_t totalEncodeSize = result_pos - result; 5019 free(indicator); 5020 free(result_unpredictable_data); 5021 free(result_type); 5022 free(reg_params); 5023 5024 SZ_ReleaseHuffman(huffmanTree); 5025 *comp_size = totalEncodeSize; 5026 5027 return result; 5028 } 5029 5030 unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) 5031 { 5032 float mean = 0.0; 5033 size_t len = r1 * r2 * r3; 5034 size_t mean_distance = (int) (sqrt(len)); 5035 float * data_pos = oriData; 5036 size_t offset_count = 0; 5037 size_t offset_count_2 = 0; 5038 size_t mean_count = 0; 5039 while(data_pos - oriData < len){ 5040 mean += *data_pos; 5041 mean_count ++; 5042 data_pos += mean_distance; 5043 offset_count += mean_distance; 5044 offset_count_2 += mean_distance; 5045 if(offset_count >= r3){ 5046 offset_count = 0; 5047 data_pos -= 1; 5048 } 5049 if(offset_count_2 >= r2 * r3){ 5050 offset_count_2 = 0; 5051 data_pos -= 1; 5052 } 5053 } 5054 if(mean_count > 0) mean /= mean_count; 5055 size_t range = 8192; 5056 size_t radius = 4096; 5057 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 5058 memset(freq_intervals, 0, range*sizeof(size_t)); 5059 5060 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 5061 int sampleDistance = confparams_cpr->sampleDistance; 5062 float predThreshold = confparams_cpr->predThreshold; 5063 5064 size_t i; 5065 size_t radiusIndex; 5066 size_t r23=r2*r3; 5067 float pred_value = 0, pred_err; 5068 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 5069 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 5070 5071 float mean_diff; 5072 ptrdiff_t freq_index; 5073 size_t freq_count = 0; 5074 size_t sample_count = 0; 5075 5076 offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset 5077 data_pos = oriData + r23 + r3 + offset_count; 5078 size_t n1_count = 1, n2_count = 1; // count i,j sum 5079 5080 while(data_pos - oriData < len){ 5081 5082 pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; 5083 pred_err = fabs(pred_value - *data_pos); 5084 if(pred_err < realPrecision) freq_count ++; 5085 radiusIndex = (pred_err/realPrecision+1)/2; 5086 if(radiusIndex>=maxRangeRadius) 5087 { 5088 radiusIndex = maxRangeRadius - 1; 5089 } 5090 intervals[radiusIndex]++; 5091 5092 mean_diff = *data_pos - mean; 5093 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 5094 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 5095 if(freq_index <= 0){ 5096 freq_intervals[0] ++; 5097 } 5098 else if(freq_index >= range){ 5099 freq_intervals[range - 1] ++; 5100 } 5101 else{ 5102 freq_intervals[freq_index] ++; 5103 } 5104 offset_count += sampleDistance; 5105 if(offset_count >= r3){ 5106 n2_count ++; 5107 if(n2_count == r2){ 5108 n1_count ++; 5109 n2_count = 1; 5110 data_pos += r3; 5111 } 5112 offset_count_2 = (n1_count + n2_count) % sampleDistance; 5113 data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 5114 offset_count = (sampleDistance - offset_count_2); 5115 if(offset_count == 0) offset_count ++; 5116 } 5117 else data_pos += sampleDistance; 5118 sample_count ++; 5119 } 5120 *max_freq = freq_count * 1.0/ sample_count; 5121 5122 //compute the appropriate number 5123 size_t targetCount = sample_count*predThreshold; 5124 size_t sum = 0; 5125 for(i=0;i<maxRangeRadius;i++) 5126 { 5127 sum += intervals[i]; 5128 if(sum>targetCount) 5129 break; 5130 } 5131 if(i>=maxRangeRadius) 5132 i = maxRangeRadius-1; 5133 unsigned int accIntervals = 2*(i+1); 5134 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 5135 5136 if(powerOf2<32) 5137 powerOf2 = 32; 5138 // collect frequency 5139 size_t max_sum = 0; 5140 size_t max_index = 0; 5141 size_t tmp_sum; 5142 size_t * freq_pos = freq_intervals + 1; 5143 for(size_t i=1; i<range-2; i++){ 5144 tmp_sum = freq_pos[0] + freq_pos[1]; 5145 if(tmp_sum > max_sum){ 5146 max_sum = tmp_sum; 5147 max_index = i; 5148 } 5149 freq_pos ++; 5150 } 5151 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 5152 *mean_freq = max_sum * 1.0 / sample_count; 5153 5154 free(freq_intervals); 5155 free(intervals); 5156 return powerOf2; 5157 } 5158 5159 5160 // 3D: modified for higher performance 5161 unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ 5162 5163 #ifdef HAVE_TIMECMPR 5164 float* decData = NULL; 5165 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5166 decData = (float*)(multisteps->hist_data); 5167 #endif 5168 5169 unsigned int quantization_intervals; 5170 float sz_sample_correct_freq = -1;//0.5; //-1 5171 float dense_pos; 5172 float mean_flush_freq; 5173 unsigned char use_mean = 0; 5174 5175 // calculate block dims 5176 size_t num_x, num_y, num_z; 5177 size_t block_size = 6; 5178 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 5179 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 5180 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); 5181 5182 size_t split_index_x, split_index_y, split_index_z; 5183 size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; 5184 size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; 5185 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 5186 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 5187 SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); 5188 5189 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; 5190 size_t num_blocks = num_x * num_y * num_z; 5191 size_t num_elements = r1 * r2 * r3; 5192 5193 size_t dim0_offset = r2 * r3; 5194 size_t dim1_offset = r3; 5195 5196 int * result_type = (int *) malloc(num_elements * sizeof(int)); 5197 size_t unpred_data_max_size = max_num_block_elements; 5198 float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); 5199 size_t total_unpred = 0; 5200 size_t unpredictable_count; 5201 size_t max_unpred_count = 0; 5202 float * data_pos = oriData; 5203 int * type = result_type; 5204 size_t type_offset; 5205 size_t offset_x, offset_y, offset_z; 5206 size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; 5207 5208 float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); 5209 float * reg_params_pos = reg_params; 5210 // move regression part out 5211 size_t params_offset_b = num_blocks; 5212 size_t params_offset_c = 2*num_blocks; 5213 size_t params_offset_d = 3*num_blocks; 5214 for(size_t i=0; i<num_x; i++){ 5215 for(size_t j=0; j<num_y; j++){ 5216 for(size_t k=0; k<num_z; k++){ 5217 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 5218 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 5219 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 5220 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 5221 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 5222 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 5223 5224 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 5225 /*Calculate regression coefficients*/ 5226 { 5227 float * cur_data_pos = data_pos; 5228 float fx = 0.0; 5229 float fy = 0.0; 5230 float fz = 0.0; 5231 float f = 0; 5232 float sum_x, sum_y; 5233 float curData; 5234 for(size_t i=0; i<current_blockcount_x; i++){ 5235 sum_x = 0; 5236 for(size_t j=0; j<current_blockcount_y; j++){ 5237 sum_y = 0; 5238 for(size_t k=0; k<current_blockcount_z; k++){ 5239 curData = *cur_data_pos; 5240 // f += curData; 5241 // fx += curData * i; 5242 // fy += curData * j; 5243 // fz += curData * k; 5244 sum_y += curData; 5245 fz += curData * k; 5246 cur_data_pos ++; 5247 } 5248 fy += sum_y * j; 5249 sum_x += sum_y; 5250 cur_data_pos += dim1_offset - current_blockcount_z; 5251 } 5252 fx += sum_x * i; 5253 f += sum_x; 5254 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5255 } 5256 float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z); 5257 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 5258 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 5259 reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1); 5260 reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2); 5261 } 5262 reg_params_pos ++; 5263 } 5264 } 5265 } 5266 5267 //Compress coefficient arrays 5268 double precision_a, precision_b, precision_c, precision_d; 5269 float rel_param_err = 0.025; 5270 precision_a = rel_param_err * realPrecision / late_blockcount_x; 5271 precision_b = rel_param_err * realPrecision / late_blockcount_y; 5272 precision_c = rel_param_err * realPrecision / late_blockcount_z; 5273 precision_d = rel_param_err * realPrecision; 5274 5275 if(exe_params->optQuantMode==1) 5276 { 5277 quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 5278 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 5279 updateQuantizationInfo(quantization_intervals); 5280 } 5281 else{ 5282 quantization_intervals = exe_params->intvCapacity; 5283 } 5284 5285 float mean = 0; 5286 if(use_mean){ 5287 // compute mean 5288 double sum = 0.0; 5289 size_t mean_count = 0; 5290 for(size_t i=0; i<num_elements; i++){ 5291 if(fabs(oriData[i] - dense_pos) < realPrecision){ 5292 sum += oriData[i]; 5293 mean_count ++; 5294 } 5295 } 5296 if(mean_count > 0) mean = sum / mean_count; 5297 } 5298 5299 double tmp_realPrecision = realPrecision; 5300 5301 // use two prediction buffers for higher performance 5302 float * unpredictable_data = result_unpredictable_data; 5303 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 5304 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 5305 size_t reg_count = 0; 5306 size_t strip_dim_0 = early_blockcount_x + 1; 5307 size_t strip_dim_1 = r2 + 1; 5308 size_t strip_dim_2 = r3 + 1; 5309 size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; 5310 size_t strip_dim1_offset = strip_dim_2; 5311 unsigned char * indicator_pos = indicator; 5312 5313 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); 5314 float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); 5315 memset(prediction_buffer_1, 0, prediction_buffer_size); 5316 float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); 5317 memset(prediction_buffer_2, 0, prediction_buffer_size); 5318 float * cur_pb_buf = prediction_buffer_1; 5319 float * next_pb_buf = prediction_buffer_2; 5320 float * cur_pb_buf_pos; 5321 float * next_pb_buf_pos; 5322 int intvCapacity = exe_params->intvCapacity; 5323 int intvRadius = exe_params->intvRadius; 5324 int use_reg = 0; 5325 float noise = realPrecision * 1.22; 5326 5327 reg_params_pos = reg_params; 5328 // compress the regression coefficients on the fly 5329 float last_coeffcients[4] = {0.0}; 5330 int coeff_intvCapacity_sz = 65536; 5331 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 5332 int * coeff_type[4]; 5333 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 5334 float * coeff_unpred_data[4]; 5335 float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); 5336 double precision[4]; 5337 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; 5338 for(int i=0; i<4; i++){ 5339 coeff_type[i] = coeff_result_type + i * num_blocks; 5340 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 5341 } 5342 int coeff_index = 0; 5343 unsigned int coeff_unpredictable_count[4] = {0}; 5344 5345 if(use_mean){ 5346 int intvCapacity_sz = intvCapacity - 2; 5347 for(size_t i=0; i<num_x; i++){ 5348 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 5349 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 5350 for(size_t j=0; j<num_y; j++){ 5351 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 5352 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 5353 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 5354 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 5355 type = result_type + type_offset; 5356 5357 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 5358 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 5359 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 5360 5361 size_t current_blockcount_z; 5362 float * pb_pos = cur_pb_buf_pos; 5363 float * next_pb_pos = next_pb_buf_pos; 5364 size_t strip_unpredictable_count = 0; 5365 for(size_t k=0; k<num_z; k++){ 5366 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 5367 #ifdef HAVE_TIMECMPR 5368 size_t offset_z = 0; 5369 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 5370 size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 5371 #endif 5372 /*sampling and decide which predictor*/ 5373 { 5374 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 5375 float * cur_data_pos; 5376 float curData; 5377 float pred_reg, pred_sz; 5378 float err_sz = 0.0, err_reg = 0.0; 5379 int bmi = 0; 5380 if(i>0 && j>0 && k>0){ 5381 for(int i=0; i<block_size; i++){ 5382 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5383 curData = *cur_data_pos; 5384 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5385 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5386 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5387 err_reg += fabs(pred_reg - curData); 5388 5389 bmi = block_size - i; 5390 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5391 curData = *cur_data_pos; 5392 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5393 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5394 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5395 err_reg += fabs(pred_reg - curData); 5396 5397 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5398 curData = *cur_data_pos; 5399 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5400 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5401 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5402 err_reg += fabs(pred_reg - curData); 5403 5404 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5405 curData = *cur_data_pos; 5406 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5407 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5408 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5409 err_reg += fabs(pred_reg - curData); 5410 } 5411 } 5412 else{ 5413 for(int i=1; i<block_size; i++){ 5414 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5415 curData = *cur_data_pos; 5416 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5417 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5418 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5419 err_reg += fabs(pred_reg - curData); 5420 5421 bmi = block_size - i; 5422 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5423 curData = *cur_data_pos; 5424 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5425 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5426 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5427 err_reg += fabs(pred_reg - curData); 5428 5429 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5430 curData = *cur_data_pos; 5431 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5432 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5433 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5434 err_reg += fabs(pred_reg - curData); 5435 5436 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5437 curData = *cur_data_pos; 5438 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5439 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5440 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5441 err_reg += fabs(pred_reg - curData); 5442 5443 } 5444 } 5445 use_reg = (err_reg < err_sz); 5446 } 5447 if(use_reg){ 5448 { 5449 /*predict coefficients in current block via previous reg_block*/ 5450 float cur_coeff; 5451 double diff, itvNum; 5452 for(int e=0; e<4; e++){ 5453 cur_coeff = reg_params_pos[e*num_blocks]; 5454 diff = cur_coeff - last_coeffcients[e]; 5455 itvNum = fabs(diff)/precision[e] + 1; 5456 if (itvNum < coeff_intvCapacity_sz){ 5457 if (diff < 0) itvNum = -itvNum; 5458 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 5459 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 5460 //ganrantee comporession error against the case of machine-epsilon 5461 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 5462 coeff_type[e][coeff_index] = 0; 5463 last_coeffcients[e] = cur_coeff; 5464 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5465 } 5466 } 5467 else{ 5468 coeff_type[e][coeff_index] = 0; 5469 last_coeffcients[e] = cur_coeff; 5470 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5471 } 5472 } 5473 coeff_index ++; 5474 } 5475 float curData; 5476 float pred; 5477 double itvNum; 5478 double diff; 5479 size_t index = 0; 5480 size_t block_unpredictable_count = 0; 5481 float * cur_data_pos = data_pos; 5482 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5483 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5484 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5485 curData = *cur_data_pos; 5486 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5487 diff = curData - pred; 5488 itvNum = fabs(diff)/tmp_realPrecision + 1; 5489 if (itvNum < intvCapacity){ 5490 if (diff < 0) itvNum = -itvNum; 5491 type[index] = (int) (itvNum/2) + intvRadius; 5492 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5493 //ganrantee comporession error against the case of machine-epsilon 5494 if(fabs(curData - pred)>tmp_realPrecision){ 5495 type[index] = 0; 5496 pred = curData; 5497 unpredictable_data[block_unpredictable_count ++] = curData; 5498 } 5499 } 5500 else{ 5501 type[index] = 0; 5502 pred = curData; 5503 unpredictable_data[block_unpredictable_count ++] = curData; 5504 } 5505 5506 #ifdef HAVE_TIMECMPR 5507 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5508 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5509 decData[block_offset + point_offset] = pred; 5510 #endif 5511 5512 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5513 // assign value to block surfaces 5514 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5515 } 5516 index ++; 5517 cur_data_pos ++; 5518 } 5519 cur_data_pos += dim1_offset - current_blockcount_z; 5520 } 5521 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5522 } 5523 /*dealing with the last ii (boundary)*/ 5524 { 5525 // ii == current_blockcount_x - 1 5526 size_t ii = current_blockcount_x - 1; 5527 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5528 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5529 curData = *cur_data_pos; 5530 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5531 diff = curData - pred; 5532 itvNum = fabs(diff)/tmp_realPrecision + 1; 5533 if (itvNum < intvCapacity){ 5534 if (diff < 0) itvNum = -itvNum; 5535 type[index] = (int) (itvNum/2) + intvRadius; 5536 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5537 //ganrantee comporession error against the case of machine-epsilon 5538 if(fabs(curData - pred)>tmp_realPrecision){ 5539 type[index] = 0; 5540 pred = curData; 5541 unpredictable_data[block_unpredictable_count ++] = curData; 5542 } 5543 } 5544 else{ 5545 type[index] = 0; 5546 pred = curData; 5547 unpredictable_data[block_unpredictable_count ++] = curData; 5548 } 5549 5550 #ifdef HAVE_TIMECMPR 5551 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5552 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5553 decData[block_offset + point_offset] = pred; 5554 #endif 5555 5556 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5557 // assign value to block surfaces 5558 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5559 } 5560 // assign value to next prediction buffer 5561 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 5562 index ++; 5563 cur_data_pos ++; 5564 } 5565 cur_data_pos += dim1_offset - current_blockcount_z; 5566 } 5567 } 5568 unpredictable_count = block_unpredictable_count; 5569 strip_unpredictable_count += unpredictable_count; 5570 unpredictable_data += unpredictable_count; 5571 5572 reg_count ++; 5573 } 5574 else{ 5575 // use SZ 5576 // SZ predication 5577 unpredictable_count = 0; 5578 float * cur_pb_pos = pb_pos; 5579 float * cur_data_pos = data_pos; 5580 float curData; 5581 float pred3D; 5582 double itvNum, diff; 5583 size_t index = 0; 5584 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5585 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5586 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5587 5588 curData = *cur_data_pos; 5589 if(fabs(curData - mean) <= realPrecision){ 5590 // adjust type[index] to intvRadius for coherence with freq in reg 5591 type[index] = intvRadius; 5592 *cur_pb_pos = mean; 5593 } 5594 else 5595 { 5596 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5597 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5598 diff = curData - pred3D; 5599 itvNum = fabs(diff)/realPrecision + 1; 5600 if (itvNum < intvCapacity_sz){ 5601 if (diff < 0) itvNum = -itvNum; 5602 type[index] = (int) (itvNum/2) + intvRadius; 5603 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5604 if(type[index] <= intvRadius) type[index] -= 1; 5605 //ganrantee comporession error against the case of machine-epsilon 5606 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5607 type[index] = 0; 5608 *cur_pb_pos = curData; 5609 unpredictable_data[unpredictable_count ++] = curData; 5610 } 5611 } 5612 else{ 5613 type[index] = 0; 5614 *cur_pb_pos = curData; 5615 unpredictable_data[unpredictable_count ++] = curData; 5616 } 5617 } 5618 #ifdef HAVE_TIMECMPR 5619 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5620 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5621 decData[block_offset + point_offset] = *cur_pb_pos; 5622 #endif 5623 5624 index ++; 5625 cur_pb_pos ++; 5626 cur_data_pos ++; 5627 } 5628 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5629 cur_data_pos += dim1_offset - current_blockcount_z; 5630 } 5631 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 5632 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5633 } 5634 /*dealing with the last ii (boundary)*/ 5635 { 5636 // ii == current_blockcount_x - 1 5637 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5638 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5639 5640 curData = *cur_data_pos; 5641 if(fabs(curData - mean) <= realPrecision){ 5642 // adjust type[index] to intvRadius for coherence with freq in reg 5643 type[index] = intvRadius; 5644 *cur_pb_pos = mean; 5645 } 5646 else 5647 { 5648 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5649 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5650 diff = curData - pred3D; 5651 itvNum = fabs(diff)/realPrecision + 1; 5652 if (itvNum < intvCapacity_sz){ 5653 if (diff < 0) itvNum = -itvNum; 5654 type[index] = (int) (itvNum/2) + intvRadius; 5655 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5656 if(type[index] <= intvRadius) type[index] -= 1; 5657 //ganrantee comporession error against the case of machine-epsilon 5658 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5659 type[index] = 0; 5660 *cur_pb_pos = curData; 5661 unpredictable_data[unpredictable_count ++] = curData; 5662 } 5663 } 5664 else{ 5665 type[index] = 0; 5666 *cur_pb_pos = curData; 5667 unpredictable_data[unpredictable_count ++] = curData; 5668 } 5669 } 5670 #ifdef HAVE_TIMECMPR 5671 size_t ii = current_blockcount_x - 1; 5672 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5673 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5674 decData[block_offset + point_offset] = *cur_pb_pos; 5675 #endif 5676 5677 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 5678 index ++; 5679 cur_pb_pos ++; 5680 cur_data_pos ++; 5681 } 5682 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5683 cur_data_pos += dim1_offset - current_blockcount_z; 5684 } 5685 } 5686 strip_unpredictable_count += unpredictable_count; 5687 unpredictable_data += unpredictable_count; 5688 // change indicator 5689 indicator_pos[k] = 1; 5690 }// end SZ 5691 5692 reg_params_pos ++; 5693 data_pos += current_blockcount_z; 5694 pb_pos += current_blockcount_z; 5695 next_pb_pos += current_blockcount_z; 5696 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 5697 5698 } // end k 5699 5700 if(strip_unpredictable_count > max_unpred_count){ 5701 max_unpred_count = strip_unpredictable_count; 5702 } 5703 total_unpred += strip_unpredictable_count; 5704 indicator_pos += num_z; 5705 }// end j 5706 float * tmp; 5707 tmp = cur_pb_buf; 5708 cur_pb_buf = next_pb_buf; 5709 next_pb_buf = tmp; 5710 }// end i 5711 } 5712 else{ 5713 int intvCapacity_sz = intvCapacity - 2; 5714 for(size_t i=0; i<num_x; i++){ 5715 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 5716 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 5717 5718 for(size_t j=0; j<num_y; j++){ 5719 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 5720 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 5721 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 5722 // copy bottom plane from plane buffer 5723 // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float)); 5724 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 5725 type = result_type + type_offset; 5726 5727 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 5728 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 5729 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 5730 5731 size_t current_blockcount_z; 5732 float * pb_pos = cur_pb_buf_pos; 5733 float * next_pb_pos = next_pb_buf_pos; 5734 size_t strip_unpredictable_count = 0; 5735 for(size_t k=0; k<num_z; k++){ 5736 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 5737 #ifdef HAVE_TIMECMPR 5738 size_t offset_z = 0; 5739 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 5740 size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 5741 #endif 5742 /*sampling*/ 5743 { 5744 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 5745 float * cur_data_pos; 5746 float curData; 5747 float pred_reg, pred_sz; 5748 float err_sz = 0.0, err_reg = 0.0; 5749 int bmi; 5750 if(i>0 && j>0 && k>0){ 5751 for(int i=0; i<block_size; i++){ 5752 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5753 curData = *cur_data_pos; 5754 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5755 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5756 err_sz += fabs(pred_sz - curData) + noise; 5757 err_reg += fabs(pred_reg - curData); 5758 5759 bmi = block_size - i; 5760 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5761 curData = *cur_data_pos; 5762 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5763 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5764 err_sz += fabs(pred_sz - curData) + noise; 5765 err_reg += fabs(pred_reg - curData); 5766 5767 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5768 curData = *cur_data_pos; 5769 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5770 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5771 err_sz += fabs(pred_sz - curData) + noise; 5772 err_reg += fabs(pred_reg - curData); 5773 5774 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5775 curData = *cur_data_pos; 5776 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5777 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5778 err_sz += fabs(pred_sz - curData) + noise; 5779 err_reg += fabs(pred_reg - curData); 5780 } 5781 } 5782 else{ 5783 for(int i=1; i<block_size; i++){ 5784 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5785 curData = *cur_data_pos; 5786 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5787 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5788 err_sz += fabs(pred_sz - curData) + noise; 5789 err_reg += fabs(pred_reg - curData); 5790 5791 bmi = block_size - i; 5792 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5793 curData = *cur_data_pos; 5794 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5795 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5796 err_sz += fabs(pred_sz - curData) + noise; 5797 err_reg += fabs(pred_reg - curData); 5798 5799 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5800 curData = *cur_data_pos; 5801 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5802 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5803 err_sz += fabs(pred_sz - curData) + noise; 5804 err_reg += fabs(pred_reg - curData); 5805 5806 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5807 curData = *cur_data_pos; 5808 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5809 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5810 err_sz += fabs(pred_sz - curData) + noise; 5811 err_reg += fabs(pred_reg - curData); 5812 } 5813 } 5814 use_reg = (err_reg < err_sz); 5815 5816 } 5817 if(use_reg) 5818 { 5819 { 5820 /*predict coefficients in current block via previous reg_block*/ 5821 float cur_coeff; 5822 double diff, itvNum; 5823 for(int e=0; e<4; e++){ 5824 cur_coeff = reg_params_pos[e*num_blocks]; 5825 diff = cur_coeff - last_coeffcients[e]; 5826 itvNum = fabs(diff)/precision[e] + 1; 5827 if (itvNum < coeff_intvCapacity_sz){ 5828 if (diff < 0) itvNum = -itvNum; 5829 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 5830 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 5831 //ganrantee comporession error against the case of machine-epsilon 5832 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 5833 coeff_type[e][coeff_index] = 0; 5834 last_coeffcients[e] = cur_coeff; 5835 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5836 } 5837 } 5838 else{ 5839 coeff_type[e][coeff_index] = 0; 5840 last_coeffcients[e] = cur_coeff; 5841 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5842 } 5843 } 5844 coeff_index ++; 5845 } 5846 float curData; 5847 float pred; 5848 double itvNum; 5849 double diff; 5850 size_t index = 0; 5851 size_t block_unpredictable_count = 0; 5852 float * cur_data_pos = data_pos; 5853 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5854 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5855 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5856 5857 curData = *cur_data_pos; 5858 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5859 diff = curData - pred; 5860 itvNum = fabs(diff)/tmp_realPrecision + 1; 5861 if (itvNum < intvCapacity){ 5862 if (diff < 0) itvNum = -itvNum; 5863 type[index] = (int) (itvNum/2) + intvRadius; 5864 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5865 //ganrantee comporession error against the case of machine-epsilon 5866 if(fabs(curData - pred)>tmp_realPrecision){ 5867 type[index] = 0; 5868 pred = curData; 5869 unpredictable_data[block_unpredictable_count ++] = curData; 5870 } 5871 } 5872 else{ 5873 type[index] = 0; 5874 pred = curData; 5875 unpredictable_data[block_unpredictable_count ++] = curData; 5876 } 5877 5878 #ifdef HAVE_TIMECMPR 5879 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5880 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5881 decData[block_offset + point_offset] = pred; 5882 #endif 5883 5884 5885 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5886 // assign value to block surfaces 5887 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5888 } 5889 index ++; 5890 cur_data_pos ++; 5891 } 5892 cur_data_pos += dim1_offset - current_blockcount_z; 5893 } 5894 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5895 } 5896 /*dealing with the last ii (boundary)*/ 5897 { 5898 // ii == current_blockcount_x - 1 5899 size_t ii = current_blockcount_x - 1; 5900 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5901 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5902 curData = *cur_data_pos; 5903 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5904 diff = curData - pred; 5905 itvNum = fabs(diff)/tmp_realPrecision + 1; 5906 if (itvNum < intvCapacity){ 5907 if (diff < 0) itvNum = -itvNum; 5908 type[index] = (int) (itvNum/2) + intvRadius; 5909 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5910 //ganrantee comporession error against the case of machine-epsilon 5911 if(fabs(curData - pred)>tmp_realPrecision){ 5912 type[index] = 0; 5913 pred = curData; 5914 unpredictable_data[block_unpredictable_count ++] = curData; 5915 } 5916 } 5917 else{ 5918 type[index] = 0; 5919 pred = curData; 5920 unpredictable_data[block_unpredictable_count ++] = curData; 5921 } 5922 5923 #ifdef HAVE_TIMECMPR 5924 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5925 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5926 decData[block_offset + point_offset] = pred; 5927 #endif 5928 5929 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5930 // assign value to block surfaces 5931 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5932 } 5933 // assign value to next prediction buffer 5934 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 5935 index ++; 5936 cur_data_pos ++; 5937 } 5938 cur_data_pos += dim1_offset - current_blockcount_z; 5939 } 5940 } 5941 unpredictable_count = block_unpredictable_count; 5942 strip_unpredictable_count += unpredictable_count; 5943 unpredictable_data += unpredictable_count; 5944 reg_count ++; 5945 } 5946 else{ 5947 // use SZ 5948 // SZ predication 5949 unpredictable_count = 0; 5950 float * cur_pb_pos = pb_pos; 5951 float * cur_data_pos = data_pos; 5952 float curData; 5953 float pred3D; 5954 double itvNum, diff; 5955 size_t index = 0; 5956 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5957 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5958 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5959 5960 curData = *cur_data_pos; 5961 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5962 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5963 diff = curData - pred3D; 5964 itvNum = fabs(diff)/realPrecision + 1; 5965 if (itvNum < intvCapacity_sz){ 5966 if (diff < 0) itvNum = -itvNum; 5967 type[index] = (int) (itvNum/2) + intvRadius; 5968 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5969 //ganrantee comporession error against the case of machine-epsilon 5970 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5971 type[index] = 0; 5972 *cur_pb_pos = curData; 5973 unpredictable_data[unpredictable_count ++] = curData; 5974 } 5975 } 5976 else{ 5977 type[index] = 0; 5978 *cur_pb_pos = curData; 5979 unpredictable_data[unpredictable_count ++] = curData; 5980 } 5981 5982 #ifdef HAVE_TIMECMPR 5983 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5984 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5985 decData[block_offset + point_offset] = *cur_pb_pos; 5986 #endif 5987 index ++; 5988 cur_pb_pos ++; 5989 cur_data_pos ++; 5990 } 5991 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5992 cur_data_pos += dim1_offset - current_blockcount_z; 5993 } 5994 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 5995 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5996 } 5997 /*dealing with the last ii (boundary)*/ 5998 { 5999 // ii == current_blockcount_x - 1 6000 for(size_t jj=0; jj<current_blockcount_y; jj++){ 6001 for(size_t kk=0; kk<current_blockcount_z; kk++){ 6002 6003 curData = *cur_data_pos; 6004 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 6005 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6006 diff = curData - pred3D; 6007 itvNum = fabs(diff)/realPrecision + 1; 6008 if (itvNum < intvCapacity_sz){ 6009 if (diff < 0) itvNum = -itvNum; 6010 type[index] = (int) (itvNum/2) + intvRadius; 6011 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6012 //ganrantee comporession error against the case of machine-epsilon 6013 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 6014 type[index] = 0; 6015 *cur_pb_pos = curData; 6016 unpredictable_data[unpredictable_count ++] = curData; 6017 } 6018 } 6019 else{ 6020 type[index] = 0; 6021 *cur_pb_pos = curData; 6022 unpredictable_data[unpredictable_count ++] = curData; 6023 } 6024 6025 #ifdef HAVE_TIMECMPR 6026 size_t ii = current_blockcount_x - 1; 6027 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 6028 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 6029 decData[block_offset + point_offset] = *cur_pb_pos; 6030 #endif 6031 6032 // assign value to next prediction buffer 6033 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 6034 index ++; 6035 cur_pb_pos ++; 6036 cur_data_pos ++; 6037 } 6038 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 6039 cur_data_pos += dim1_offset - current_blockcount_z; 6040 } 6041 } 6042 strip_unpredictable_count += unpredictable_count; 6043 unpredictable_data += unpredictable_count; 6044 // change indicator 6045 indicator_pos[k] = 1; 6046 }// end SZ 6047 6048 reg_params_pos ++; 6049 data_pos += current_blockcount_z; 6050 pb_pos += current_blockcount_z; 6051 next_pb_pos += current_blockcount_z; 6052 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 6053 6054 } 6055 6056 if(strip_unpredictable_count > max_unpred_count){ 6057 max_unpred_count = strip_unpredictable_count; 6058 } 6059 total_unpred += strip_unpredictable_count; 6060 indicator_pos += num_z; 6061 } 6062 float * tmp; 6063 tmp = cur_pb_buf; 6064 cur_pb_buf = next_pb_buf; 6065 next_pb_buf = tmp; 6066 } 6067 } 6068 6069 free(prediction_buffer_1); 6070 free(prediction_buffer_2); 6071 6072 int stateNum = 2*quantization_intervals; 6073 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6074 6075 size_t nodeCount = 0; 6076 init(huffmanTree, result_type, num_elements); 6077 size_t i = 0; 6078 for (i = 0; i < huffmanTree->stateNum; i++) 6079 if (huffmanTree->code[i]) nodeCount++; 6080 nodeCount = nodeCount*2-1; 6081 6082 unsigned char *treeBytes; 6083 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6084 6085 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 6086 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 6087 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); 6088 unsigned char * result_pos = result; 6089 initRandomAccessBytes(result_pos); 6090 6091 result_pos += meta_data_offset; 6092 6093 sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 6094 result_pos += exe_params->SZ_SIZE_TYPE; 6095 6096 intToBytes_bigEndian(result_pos, block_size); 6097 result_pos += sizeof(int); 6098 doubleToBytes(result_pos, realPrecision); 6099 result_pos += sizeof(double); 6100 intToBytes_bigEndian(result_pos, quantization_intervals); 6101 result_pos += sizeof(int); 6102 intToBytes_bigEndian(result_pos, treeByteSize); 6103 result_pos += sizeof(int); 6104 intToBytes_bigEndian(result_pos, nodeCount); 6105 result_pos += sizeof(int); 6106 memcpy(result_pos, treeBytes, treeByteSize); 6107 result_pos += treeByteSize; 6108 free(treeBytes); 6109 6110 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 6111 result_pos += sizeof(unsigned char); 6112 memcpy(result_pos, &mean, sizeof(float)); 6113 result_pos += sizeof(float); 6114 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 6115 result_pos += indicator_size; 6116 6117 //convert the lead/mid/resi to byte stream 6118 if(reg_count > 0){ 6119 for(int e=0; e<4; e++){ 6120 int stateNum = 2*coeff_intvCapacity_sz; 6121 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6122 size_t nodeCount = 0; 6123 init(huffmanTree, coeff_type[e], reg_count); 6124 size_t i = 0; 6125 for (i = 0; i < huffmanTree->stateNum; i++) 6126 if (huffmanTree->code[i]) nodeCount++; 6127 nodeCount = nodeCount*2-1; 6128 unsigned char *treeBytes; 6129 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6130 doubleToBytes(result_pos, precision[e]); 6131 result_pos += sizeof(double); 6132 intToBytes_bigEndian(result_pos, coeff_intvRadius); 6133 result_pos += sizeof(int); 6134 intToBytes_bigEndian(result_pos, treeByteSize); 6135 result_pos += sizeof(int); 6136 intToBytes_bigEndian(result_pos, nodeCount); 6137 result_pos += sizeof(int); 6138 memcpy(result_pos, treeBytes, treeByteSize); 6139 result_pos += treeByteSize; 6140 free(treeBytes); 6141 size_t typeArray_size = 0; 6142 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 6143 sizeToBytes(result_pos, typeArray_size); 6144 result_pos += sizeof(size_t) + typeArray_size; 6145 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 6146 result_pos += sizeof(int); 6147 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); 6148 result_pos += coeff_unpredictable_count[e]*sizeof(float); 6149 SZ_ReleaseHuffman(huffmanTree); 6150 } 6151 } 6152 free(coeff_result_type); 6153 free(coeff_unpredictable_data); 6154 6155 //record the number of unpredictable data and also store them 6156 memcpy(result_pos, &total_unpred, sizeof(size_t)); 6157 result_pos += sizeof(size_t); 6158 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); 6159 result_pos += total_unpred * sizeof(float); 6160 size_t typeArray_size = 0; 6161 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 6162 result_pos += typeArray_size; 6163 size_t totalEncodeSize = result_pos - result; 6164 free(indicator); 6165 free(result_unpredictable_data); 6166 free(result_type); 6167 free(reg_params); 6168 6169 6170 SZ_ReleaseHuffman(huffmanTree); 6171 *comp_size = totalEncodeSize; 6172 return result; 6173 } 6174 6175 unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ 6176 6177 unsigned int quantization_intervals; 6178 float sz_sample_correct_freq = -1;//0.5; //-1 6179 float dense_pos; 6180 float mean_flush_freq; 6181 unsigned char use_mean = 0; 6182 6183 // calculate block dims 6184 size_t num_x, num_y, num_z; 6185 size_t block_size = 6; 6186 num_x = (r1 - 1) / block_size + 1; 6187 num_y = (r2 - 1) / block_size + 1; 6188 num_z = (r3 - 1) / block_size + 1; 6189 6190 size_t max_num_block_elements = block_size * block_size * block_size; 6191 size_t num_blocks = num_x * num_y * num_z; 6192 size_t num_elements = r1 * r2 * r3; 6193 6194 size_t dim0_offset = r2 * r3; 6195 size_t dim1_offset = r3; 6196 6197 int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); 6198 size_t unpred_data_max_size = max_num_block_elements; 6199 float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); 6200 size_t total_unpred = 0; 6201 size_t unpredictable_count; 6202 float * data_pos = oriData; 6203 int * type = result_type; 6204 float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); 6205 float * reg_params_pos = reg_params; 6206 // move regression part out 6207 size_t params_offset_b = num_blocks; 6208 size_t params_offset_c = 2*num_blocks; 6209 size_t params_offset_d = 3*num_blocks; 6210 float * pred_buffer = (float *) malloc((block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); 6211 float * pred_buffer_pos = NULL; 6212 float * block_data_pos_x = NULL; 6213 float * block_data_pos_y = NULL; 6214 float * block_data_pos_z = NULL; 6215 for(size_t i=0; i<num_x; i++){ 6216 for(size_t j=0; j<num_y; j++){ 6217 for(size_t k=0; k<num_z; k++){ 6218 data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 6219 pred_buffer_pos = pred_buffer; 6220 block_data_pos_x = data_pos; 6221 // use the buffer as block_size*block_size*block_size 6222 for(int ii=0; ii<block_size; ii++){ 6223 block_data_pos_y = block_data_pos_x; 6224 for(int jj=0; jj<block_size; jj++){ 6225 block_data_pos_z = block_data_pos_y; 6226 for(int kk=0; kk<block_size; kk++){ 6227 *pred_buffer_pos = *block_data_pos_z; 6228 if(k*block_size + kk + 1 < r3) block_data_pos_z ++; 6229 pred_buffer_pos ++; 6230 } 6231 if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset; 6232 } 6233 if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset; 6234 } 6235 /*Calculate regression coefficients*/ 6236 { 6237 float * cur_data_pos = pred_buffer; 6238 float fx = 0.0; 6239 float fy = 0.0; 6240 float fz = 0.0; 6241 float f = 0; 6242 float sum_x, sum_y; 6243 float curData; 6244 for(size_t i=0; i<block_size; i++){ 6245 sum_x = 0; 6246 for(size_t j=0; j<block_size; j++){ 6247 sum_y = 0; 6248 for(size_t k=0; k<block_size; k++){ 6249 curData = *cur_data_pos; 6250 sum_y += curData; 6251 fz += curData * k; 6252 cur_data_pos ++; 6253 } 6254 fy += sum_y * j; 6255 sum_x += sum_y; 6256 } 6257 fx += sum_x * i; 6258 f += sum_x; 6259 } 6260 float coeff = 1.0 / (block_size * block_size * block_size); 6261 reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1); 6262 reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1); 6263 reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1); 6264 reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2); 6265 } 6266 reg_params_pos ++; 6267 } 6268 } 6269 } 6270 6271 //Compress coefficient arrays 6272 double precision_a, precision_b, precision_c, precision_d; 6273 float rel_param_err = 0.025; 6274 precision_a = rel_param_err * realPrecision / block_size; 6275 precision_b = rel_param_err * realPrecision / block_size; 6276 precision_c = rel_param_err * realPrecision / block_size; 6277 precision_d = rel_param_err * realPrecision; 6278 6279 if(exe_params->optQuantMode==1) 6280 { 6281 quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 6282 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 6283 updateQuantizationInfo(quantization_intervals); 6284 } 6285 else{ 6286 quantization_intervals = exe_params->intvCapacity; 6287 } 6288 6289 float mean = 0; 6290 if(use_mean){ 6291 // compute mean 6292 double sum = 0.0; 6293 size_t mean_count = 0; 6294 for(size_t i=0; i<num_elements; i++){ 6295 if(fabs(oriData[i] - dense_pos) < realPrecision){ 6296 sum += oriData[i]; 6297 mean_count ++; 6298 } 6299 } 6300 if(mean_count > 0) mean = sum / mean_count; 6301 } 6302 6303 double tmp_realPrecision = realPrecision; 6304 6305 // use two prediction buffers for higher performance 6306 float * unpredictable_data = result_unpredictable_data; 6307 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 6308 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 6309 size_t reg_count = 0; 6310 unsigned char * indicator_pos = indicator; 6311 6312 int intvCapacity = exe_params->intvCapacity; 6313 int intvRadius = exe_params->intvRadius; 6314 int use_reg = 0; 6315 float noise = realPrecision * 1.22; 6316 6317 reg_params_pos = reg_params; 6318 // compress the regression coefficients on the fly 6319 float last_coeffcients[4] = {0.0}; 6320 int coeff_intvCapacity_sz = 65536; 6321 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 6322 int * coeff_type[4]; 6323 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 6324 float * coeff_unpred_data[4]; 6325 float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); 6326 double precision[4]; 6327 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; 6328 for(int i=0; i<4; i++){ 6329 coeff_type[i] = coeff_result_type + i * num_blocks; 6330 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 6331 } 6332 int coeff_index = 0; 6333 unsigned int coeff_unpredictable_count[4] = {0}; 6334 6335 memset(pred_buffer, 0, (block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); 6336 int pred_buffer_block_size = block_size + 1; 6337 int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size; 6338 int strip_dim1_offset = pred_buffer_block_size; 6339 6340 if(use_mean){ 6341 int intvCapacity_sz = intvCapacity - 2; 6342 type = result_type; 6343 for(size_t i=0; i<num_x; i++){ 6344 for(size_t j=0; j<num_y; j++){ 6345 for(size_t k=0; k<num_z; k++){ 6346 data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 6347 // add 1 in x, y, z offset 6348 pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6349 block_data_pos_x = data_pos; 6350 for(int ii=0; ii<block_size; ii++){ 6351 block_data_pos_y = block_data_pos_x; 6352 for(int jj=0; jj<block_size; jj++){ 6353 block_data_pos_z = block_data_pos_y; 6354 for(int kk=0; kk<block_size; kk++){ 6355 *pred_buffer_pos = *block_data_pos_z; 6356 if(k*block_size + kk + 1< r3) block_data_pos_z ++; 6357 pred_buffer_pos ++; 6358 } 6359 // add 1 in z offset 6360 pred_buffer_pos ++; 6361 if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset; 6362 } 6363 // add 1 in y offset 6364 pred_buffer_pos += pred_buffer_block_size; 6365 if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset; 6366 } 6367 /*sampling and decide which predictor*/ 6368 { 6369 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 6370 float * cur_data_pos; 6371 float curData; 6372 float pred_reg, pred_sz; 6373 float err_sz = 0.0, err_reg = 0.0; 6374 int bmi = 0; 6375 for(int i=2; i<=block_size; i++){ 6376 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i; 6377 curData = *cur_data_pos; 6378 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6379 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6380 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6381 err_reg += fabs(pred_reg - curData); 6382 6383 bmi = block_size - i; 6384 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi; 6385 curData = *cur_data_pos; 6386 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6387 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6388 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6389 err_reg += fabs(pred_reg - curData); 6390 6391 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i; 6392 curData = *cur_data_pos; 6393 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6394 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6395 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6396 err_reg += fabs(pred_reg - curData); 6397 6398 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi; 6399 curData = *cur_data_pos; 6400 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6401 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6402 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6403 err_reg += fabs(pred_reg - curData); 6404 } 6405 6406 use_reg = (err_reg < err_sz); 6407 } 6408 if(use_reg){ 6409 { 6410 /*predict coefficients in current block via previous reg_block*/ 6411 float cur_coeff; 6412 double diff, itvNum; 6413 for(int e=0; e<4; e++){ 6414 cur_coeff = reg_params_pos[e*num_blocks]; 6415 diff = cur_coeff - last_coeffcients[e]; 6416 itvNum = fabs(diff)/precision[e] + 1; 6417 if (itvNum < coeff_intvCapacity_sz){ 6418 if (diff < 0) itvNum = -itvNum; 6419 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 6420 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 6421 //ganrantee comporession error against the case of machine-epsilon 6422 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 6423 coeff_type[e][coeff_index] = 0; 6424 last_coeffcients[e] = cur_coeff; 6425 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6426 } 6427 } 6428 else{ 6429 coeff_type[e][coeff_index] = 0; 6430 last_coeffcients[e] = cur_coeff; 6431 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6432 } 6433 } 6434 coeff_index ++; 6435 } 6436 float curData; 6437 float pred; 6438 double itvNum; 6439 double diff; 6440 size_t index = 0; 6441 size_t block_unpredictable_count = 0; 6442 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6443 for(size_t ii=0; ii<block_size; ii++){ 6444 for(size_t jj=0; jj<block_size; jj++){ 6445 for(size_t kk=0; kk<block_size; kk++){ 6446 curData = *cur_data_pos; 6447 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 6448 diff = curData - pred; 6449 itvNum = fabs(diff)/tmp_realPrecision + 1; 6450 if (itvNum < intvCapacity){ 6451 if (diff < 0) itvNum = -itvNum; 6452 type[index] = (int) (itvNum/2) + intvRadius; 6453 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6454 //ganrantee comporession error against the case of machine-epsilon 6455 if(fabs(curData - pred)>tmp_realPrecision){ 6456 type[index] = 0; 6457 pred = curData; 6458 unpredictable_data[block_unpredictable_count ++] = curData; 6459 } 6460 } 6461 else{ 6462 type[index] = 0; 6463 pred = curData; 6464 unpredictable_data[block_unpredictable_count ++] = curData; 6465 } 6466 index ++; 6467 cur_data_pos ++; 6468 } 6469 cur_data_pos ++; 6470 } 6471 cur_data_pos += pred_buffer_block_size; 6472 } 6473 6474 total_unpred += block_unpredictable_count; 6475 unpredictable_data += block_unpredictable_count; 6476 reg_count ++; 6477 } 6478 else{ 6479 // use SZ 6480 // SZ predication 6481 unpredictable_count = 0; 6482 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6483 float curData; 6484 float pred3D; 6485 double itvNum, diff; 6486 size_t index = 0; 6487 for(size_t ii=0; ii<block_size; ii++){ 6488 for(size_t jj=0; jj<block_size; jj++){ 6489 for(size_t kk=0; kk<block_size; kk++){ 6490 6491 curData = *cur_data_pos; 6492 if(fabs(curData - mean) <= realPrecision){ 6493 type[index] = 1; 6494 *cur_data_pos = mean; 6495 } 6496 else 6497 { 6498 pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] 6499 - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6500 diff = curData - pred3D; 6501 itvNum = fabs(diff)/realPrecision + 1; 6502 if (itvNum < intvCapacity_sz){ 6503 if (diff < 0) itvNum = -itvNum; 6504 type[index] = (int) (itvNum/2) + intvRadius; 6505 *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6506 //ganrantee comporession error against the case of machine-epsilon 6507 if(fabs(curData - *cur_data_pos)>tmp_realPrecision){ 6508 type[index] = 0; 6509 *cur_data_pos = curData; 6510 unpredictable_data[unpredictable_count ++] = curData; 6511 } 6512 } 6513 else{ 6514 type[index] = 0; 6515 *cur_data_pos = curData; 6516 unpredictable_data[unpredictable_count ++] = curData; 6517 } 6518 } 6519 index ++; 6520 cur_data_pos ++; 6521 } 6522 cur_data_pos ++; 6523 } 6524 cur_data_pos += pred_buffer_block_size; 6525 } 6526 total_unpred += unpredictable_count; 6527 unpredictable_data += unpredictable_count; 6528 // change indicator 6529 indicator_pos[k] = 1; 6530 }// end SZ 6531 reg_params_pos ++; 6532 type += block_size * block_size * block_size; 6533 } // end k 6534 indicator_pos += num_z; 6535 }// end j 6536 }// end i 6537 } 6538 else{ 6539 int intvCapacity_sz = intvCapacity - 2; 6540 type = result_type; 6541 for(size_t i=0; i<num_x; i++){ 6542 for(size_t j=0; j<num_y; j++){ 6543 for(size_t k=0; k<num_z; k++){ 6544 data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 6545 // add 1 in x, y, z offset 6546 pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6547 block_data_pos_x = data_pos; 6548 for(int ii=0; ii<block_size; ii++){ 6549 block_data_pos_y = block_data_pos_x; 6550 for(int jj=0; jj<block_size; jj++){ 6551 block_data_pos_z = block_data_pos_y; 6552 for(int kk=0; kk<block_size; kk++){ 6553 *pred_buffer_pos = *block_data_pos_z; 6554 if(k*block_size + kk < r3) block_data_pos_z ++; 6555 pred_buffer_pos ++; 6556 } 6557 // add 1 in z offset 6558 pred_buffer_pos ++; 6559 if(j*block_size + jj < r2) block_data_pos_y += dim1_offset; 6560 } 6561 // add 1 in y offset 6562 pred_buffer_pos += pred_buffer_block_size; 6563 if(i*block_size + ii < r1) block_data_pos_x += dim0_offset; 6564 } 6565 /*sampling*/ 6566 { 6567 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 6568 float * cur_data_pos; 6569 float curData; 6570 float pred_reg, pred_sz; 6571 float err_sz = 0.0, err_reg = 0.0; 6572 int bmi; 6573 for(int i=2; i<=block_size; i++){ 6574 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i; 6575 curData = *cur_data_pos; 6576 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6577 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6578 err_sz += fabs(pred_sz - curData) + noise; 6579 err_reg += fabs(pred_reg - curData); 6580 6581 bmi = block_size - i; 6582 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi; 6583 curData = *cur_data_pos; 6584 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6585 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6586 err_sz += fabs(pred_sz - curData) + noise; 6587 err_reg += fabs(pred_reg - curData); 6588 6589 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i; 6590 curData = *cur_data_pos; 6591 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6592 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6593 err_sz += fabs(pred_sz - curData) + noise; 6594 err_reg += fabs(pred_reg - curData); 6595 6596 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi; 6597 curData = *cur_data_pos; 6598 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6599 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6600 err_sz += fabs(pred_sz - curData) + noise; 6601 err_reg += fabs(pred_reg - curData); 6602 } 6603 6604 use_reg = (err_reg < err_sz); 6605 6606 } 6607 if(use_reg) 6608 { 6609 { 6610 /*predict coefficients in current block via previous reg_block*/ 6611 float cur_coeff; 6612 double diff, itvNum; 6613 for(int e=0; e<4; e++){ 6614 cur_coeff = reg_params_pos[e*num_blocks]; 6615 diff = cur_coeff - last_coeffcients[e]; 6616 itvNum = fabs(diff)/precision[e] + 1; 6617 if (itvNum < coeff_intvCapacity_sz){ 6618 if (diff < 0) itvNum = -itvNum; 6619 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 6620 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 6621 //ganrantee comporession error against the case of machine-epsilon 6622 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 6623 coeff_type[e][coeff_index] = 0; 6624 last_coeffcients[e] = cur_coeff; 6625 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6626 } 6627 } 6628 else{ 6629 coeff_type[e][coeff_index] = 0; 6630 last_coeffcients[e] = cur_coeff; 6631 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6632 } 6633 } 6634 coeff_index ++; 6635 } 6636 float curData; 6637 float pred; 6638 double itvNum; 6639 double diff; 6640 size_t index = 0; 6641 size_t block_unpredictable_count = 0; 6642 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6643 for(size_t ii=0; ii<block_size; ii++){ 6644 for(size_t jj=0; jj<block_size; jj++){ 6645 for(size_t kk=0; kk<block_size; kk++){ 6646 curData = *cur_data_pos; 6647 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 6648 diff = curData - pred; 6649 itvNum = fabs(diff)/tmp_realPrecision + 1; 6650 if (itvNum < intvCapacity){ 6651 if (diff < 0) itvNum = -itvNum; 6652 type[index] = (int) (itvNum/2) + intvRadius; 6653 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6654 //ganrantee comporession error against the case of machine-epsilon 6655 if(fabs(curData - pred)>tmp_realPrecision){ 6656 type[index] = 0; 6657 pred = curData; 6658 unpredictable_data[block_unpredictable_count ++] = curData; 6659 } 6660 } 6661 else{ 6662 type[index] = 0; 6663 pred = curData; 6664 unpredictable_data[block_unpredictable_count ++] = curData; 6665 } 6666 index ++; 6667 cur_data_pos ++; 6668 } 6669 cur_data_pos ++; 6670 } 6671 cur_data_pos += pred_buffer_block_size; 6672 } 6673 total_unpred += block_unpredictable_count; 6674 unpredictable_data += block_unpredictable_count; 6675 reg_count ++; 6676 } 6677 else{ 6678 // use SZ 6679 // SZ predication 6680 unpredictable_count = 0; 6681 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6682 float curData; 6683 float pred3D; 6684 double itvNum, diff; 6685 size_t index = 0; 6686 for(size_t ii=0; ii<block_size; ii++){ 6687 for(size_t jj=0; jj<block_size; jj++){ 6688 for(size_t kk=0; kk<block_size; kk++){ 6689 curData = *cur_data_pos; 6690 pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] 6691 - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6692 diff = curData - pred3D; 6693 itvNum = fabs(diff)/realPrecision + 1; 6694 if (itvNum < intvCapacity_sz){ 6695 if (diff < 0) itvNum = -itvNum; 6696 type[index] = (int) (itvNum/2) + intvRadius; 6697 *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6698 //ganrantee comporession error against the case of machine-epsilon 6699 if(fabs(curData - *cur_data_pos)>tmp_realPrecision){ 6700 type[index] = 0; 6701 *cur_data_pos = curData; 6702 unpredictable_data[unpredictable_count ++] = curData; 6703 } 6704 } 6705 else{ 6706 type[index] = 0; 6707 *cur_data_pos = curData; 6708 unpredictable_data[unpredictable_count ++] = curData; 6709 } 6710 index ++; 6711 cur_data_pos ++; 6712 } 6713 cur_data_pos ++; 6714 } 6715 cur_data_pos += pred_buffer_block_size; 6716 } 6717 total_unpred += unpredictable_count; 6718 unpredictable_data += unpredictable_count; 6719 // change indicator 6720 indicator_pos[k] = 1; 6721 }// end SZ 6722 reg_params_pos ++; 6723 type += block_size * block_size * block_size; 6724 } 6725 indicator_pos += num_z; 6726 } 6727 } 6728 } 6729 free(pred_buffer); 6730 int stateNum = 2*quantization_intervals; 6731 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6732 6733 size_t nodeCount = 0; 6734 init(huffmanTree, result_type, num_blocks*max_num_block_elements); 6735 size_t i = 0; 6736 for (i = 0; i < huffmanTree->stateNum; i++) 6737 if (huffmanTree->code[i]) nodeCount++; 6738 nodeCount = nodeCount*2-1; 6739 6740 unsigned char *treeBytes; 6741 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6742 6743 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 6744 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 6745 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); 6746 unsigned char * result_pos = result; 6747 initRandomAccessBytes(result_pos); 6748 6749 result_pos += meta_data_offset; 6750 6751 sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 6752 result_pos += exe_params->SZ_SIZE_TYPE; 6753 6754 intToBytes_bigEndian(result_pos, block_size); 6755 result_pos += sizeof(int); 6756 doubleToBytes(result_pos, realPrecision); 6757 result_pos += sizeof(double); 6758 intToBytes_bigEndian(result_pos, quantization_intervals); 6759 result_pos += sizeof(int); 6760 intToBytes_bigEndian(result_pos, treeByteSize); 6761 result_pos += sizeof(int); 6762 intToBytes_bigEndian(result_pos, nodeCount); 6763 result_pos += sizeof(int); 6764 memcpy(result_pos, treeBytes, treeByteSize); 6765 result_pos += treeByteSize; 6766 free(treeBytes); 6767 6768 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 6769 result_pos += sizeof(unsigned char); 6770 memcpy(result_pos, &mean, sizeof(float)); 6771 result_pos += sizeof(float); 6772 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 6773 result_pos += indicator_size; 6774 6775 //convert the lead/mid/resi to byte stream 6776 if(reg_count > 0){ 6777 for(int e=0; e<4; e++){ 6778 int stateNum = 2*coeff_intvCapacity_sz; 6779 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6780 size_t nodeCount = 0; 6781 init(huffmanTree, coeff_type[e], reg_count); 6782 size_t i = 0; 6783 for (i = 0; i < huffmanTree->stateNum; i++) 6784 if (huffmanTree->code[i]) nodeCount++; 6785 nodeCount = nodeCount*2-1; 6786 unsigned char *treeBytes; 6787 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6788 doubleToBytes(result_pos, precision[e]); 6789 result_pos += sizeof(double); 6790 intToBytes_bigEndian(result_pos, coeff_intvRadius); 6791 result_pos += sizeof(int); 6792 intToBytes_bigEndian(result_pos, treeByteSize); 6793 result_pos += sizeof(int); 6794 intToBytes_bigEndian(result_pos, nodeCount); 6795 result_pos += sizeof(int); 6796 memcpy(result_pos, treeBytes, treeByteSize); 6797 result_pos += treeByteSize; 6798 free(treeBytes); 6799 size_t typeArray_size = 0; 6800 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 6801 sizeToBytes(result_pos, typeArray_size); 6802 result_pos += sizeof(size_t) + typeArray_size; 6803 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 6804 result_pos += sizeof(int); 6805 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); 6806 result_pos += coeff_unpredictable_count[e]*sizeof(float); 6807 SZ_ReleaseHuffman(huffmanTree); 6808 } 6809 } 6810 free(coeff_result_type); 6811 free(coeff_unpredictable_data); 6812 6813 //record the number of unpredictable data and also store them 6814 memcpy(result_pos, &total_unpred, sizeof(size_t)); 6815 result_pos += sizeof(size_t); 6816 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); 6817 result_pos += total_unpred * sizeof(float); 6818 size_t typeArray_size = 0; 6819 encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size); 6820 result_pos += typeArray_size; 6821 size_t totalEncodeSize = result_pos - result; 6822 free(indicator); 6823 free(result_unpredictable_data); 6824 free(result_type); 6825 free(reg_params); 6826 6827 6828 SZ_ReleaseHuffman(huffmanTree); 6829 *comp_size = totalEncodeSize; 6830 return result; 6831 }
Note: See TracChangeset
for help on using the changeset viewer.