Changeset 9ee2ce3 for thirdparty/SZ/sz/src/sz_double.c
- Timestamp:
- 09/28/18 16:32:55 (6 years ago)
- Branches:
- master, pympi
- Children:
- e6aa0eb
- Parents:
- abca157
- git-author:
- Hal Finkel <hfinkel@…> (09/28/18 16:32:55)
- git-committer:
- Hal Finkel <hfinkel@…> (09/28/18 16:32:55)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
thirdparty/SZ/sz/src/sz_double.c
r2c47b73 r9ee2ce3 26 26 #include "rw.h" 27 27 #include "sz_double_ts.h" 28 #include "utility.h" 28 29 29 30 unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize) … … 329 330 pred = last3CmprsData[0]; 330 331 predAbsErr = fabs(curData - pred); 331 if(predAbsErr< =checkRadius)332 if(predAbsErr<checkRadius) 332 333 { 333 334 state = (predAbsErr/realPrecision+1)/2; … … 1517 1518 if(errBoundMode>=PW_REL) 1518 1519 { 1519 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max);1520 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);1520 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); 1521 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); 1521 1522 } 1522 1523 else … … 1563 1564 return SZ_NSCS; 1564 1565 } 1565 } 1566 } 1566 1567 1567 1568 int status = SZ_SCES; … … 1601 1602 if(confparams_cpr->errorBoundMode>=PW_REL) 1602 1603 { 1603 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max); 1604 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, 1605 valueRangeSize, medianValue, &tmpOutSize); 1604 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); 1605 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); 1606 1606 } 1607 1607 else 1608 1608 #ifdef HAVE_TIMECMPR 1609 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 1609 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 1610 1610 multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1611 1611 else … … 1617 1617 { 1618 1618 if(confparams_cpr->errorBoundMode>=PW_REL) 1619 SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr (&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max);1619 SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); 1620 1620 else 1621 1621 #ifdef HAVE_TIMECMPR … … 1624 1624 else 1625 1625 #endif 1626 SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1626 { 1627 if(sz_with_regression == SZ_NO_REGRESSION) 1628 SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1629 else 1630 tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); 1631 } 1627 1632 } 1628 1633 else … … 1630 1635 { 1631 1636 if(confparams_cpr->errorBoundMode>=PW_REL) 1632 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max);1637 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); 1633 1638 else 1634 1639 #ifdef HAVE_TIMECMPR … … 1637 1642 else 1638 1643 #endif 1639 SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1644 { 1645 if(sz_with_regression == SZ_NO_REGRESSION) 1646 SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1647 else 1648 tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); 1649 } 1650 1651 1640 1652 } 1641 1653 else … … 1643 1655 { 1644 1656 if(confparams_cpr->errorBoundMode>=PW_REL) 1645 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max);1657 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); 1646 1658 else 1647 1659 #ifdef HAVE_TIMECMPR … … 1649 1661 multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1650 1662 else 1651 #endif 1652 SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1663 #endif 1664 { 1665 if(sz_with_regression == SZ_NO_REGRESSION) 1666 SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1667 else 1668 tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); 1669 } 1670 1653 1671 } 1654 1672 else … … 1666 1684 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1667 1685 { 1668 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1686 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1669 1687 free(tmpByteData); 1670 1688 } … … 3122 3140 { 3123 3141 radiusIndex = confparams_cpr->maxRangeRadius - 1; 3124 //printf("radiusIndex=%d\n", radiusIndex);3125 3142 } 3126 3143 intervals[radiusIndex]++; 3127 // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData);3128 // fflush(stdout);3129 3144 offset_count += confparams_cpr->sampleDistance; 3130 3145 if(offset_count >= r3){ … … 3142 3157 else data_pos += confparams_cpr->sampleDistance; 3143 3158 } 3144 // printf("sample_count: %ld\n", sample_count);3145 // fflush(stdout);3146 // if(*max_freq < 0.15) *max_freq *= 2;3147 3159 //compute the appropriate number 3148 3160 size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; … … 3162 3174 powerOf2 = 32; 3163 3175 free(intervals); 3164 //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);3165 3176 return powerOf2; 3166 3177 } … … 3173 3184 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3174 3185 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3175 size_t totalSampleSize = 0; //(r1-1)*(r2-1)/confparams_cpr->sampleDistance;3186 size_t totalSampleSize = 0; 3176 3187 3177 3188 size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset … … 3227 3238 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3228 3239 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3229 size_t totalSampleSize = 0; //dataLength/confparams_cpr->sampleDistance;3240 size_t totalSampleSize = 0; 3230 3241 3231 3242 double * data_pos = oriData + 2; 3232 3243 while(data_pos - oriData < dataLength){ 3233 3244 totalSampleSize++; 3234 //pred_value = 2*data_pos[-1] - data_pos[-2];3235 3245 pred_value = data_pos[-1]; 3236 3246 pred_err = fabs(pred_value - *data_pos); … … 3261 3271 3262 3272 free(intervals); 3263 //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);3264 3273 return powerOf2; 3265 3274 } 3275 3276 /*The above code is for sz 1.4.13; the following code is for sz 2.0*/ 3277 unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) 3278 { 3279 double mean = 0.0; 3280 size_t len = r1 * r2; 3281 size_t mean_distance = (int) (sqrt(len)); 3282 3283 double * data_pos = oriData; 3284 size_t mean_count = 0; 3285 while(data_pos - oriData < len){ 3286 mean += *data_pos; 3287 mean_count ++; 3288 data_pos += mean_distance; 3289 } 3290 if(mean_count > 0) mean /= mean_count; 3291 size_t range = 8192; 3292 size_t radius = 4096; 3293 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 3294 memset(freq_intervals, 0, range*sizeof(size_t)); 3295 3296 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 3297 int sampleDistance = confparams_cpr->sampleDistance; 3298 double predThreshold = confparams_cpr->predThreshold; 3299 3300 size_t i; 3301 size_t radiusIndex; 3302 double pred_value = 0, pred_err; 3303 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 3304 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 3305 3306 double mean_diff; 3307 ptrdiff_t freq_index; 3308 size_t freq_count = 0; 3309 size_t n1_count = 1; 3310 size_t offset_count = sampleDistance - 1; 3311 size_t offset_count_2 = 0; 3312 size_t sample_count = 0; 3313 data_pos = oriData + r2 + offset_count; 3314 while(data_pos - oriData < len){ 3315 pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; 3316 pred_err = fabs(pred_value - *data_pos); 3317 if(pred_err < realPrecision) freq_count ++; 3318 radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); 3319 if(radiusIndex>=maxRangeRadius) 3320 radiusIndex = maxRangeRadius - 1; 3321 intervals[radiusIndex]++; 3322 3323 mean_diff = *data_pos - mean; 3324 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 3325 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 3326 if(freq_index <= 0){ 3327 freq_intervals[0] ++; 3328 } 3329 else if(freq_index >= range){ 3330 freq_intervals[range - 1] ++; 3331 } 3332 else{ 3333 freq_intervals[freq_index] ++; 3334 } 3335 offset_count += sampleDistance; 3336 if(offset_count >= r2){ 3337 n1_count ++; 3338 offset_count_2 = n1_count % sampleDistance; 3339 data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 3340 offset_count = (sampleDistance - offset_count_2); 3341 if(offset_count == 0) offset_count ++; 3342 } 3343 else data_pos += sampleDistance; 3344 sample_count ++; 3345 } 3346 *max_freq = freq_count * 1.0/ sample_count; 3347 3348 //compute the appropriate number 3349 size_t targetCount = sample_count*predThreshold; 3350 size_t sum = 0; 3351 for(i=0;i<maxRangeRadius;i++) 3352 { 3353 sum += intervals[i]; 3354 if(sum>targetCount) 3355 break; 3356 } 3357 if(i>=maxRangeRadius) 3358 i = maxRangeRadius-1; 3359 unsigned int accIntervals = 2*(i+1); 3360 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 3361 3362 if(powerOf2<32) 3363 powerOf2 = 32; 3364 3365 // collect frequency 3366 size_t max_sum = 0; 3367 size_t max_index = 0; 3368 size_t tmp_sum; 3369 size_t * freq_pos = freq_intervals + 1; 3370 for(size_t i=1; i<range-2; i++){ 3371 tmp_sum = freq_pos[0] + freq_pos[1]; 3372 if(tmp_sum > max_sum){ 3373 max_sum = tmp_sum; 3374 max_index = i; 3375 } 3376 freq_pos ++; 3377 } 3378 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 3379 *mean_freq = max_sum * 1.0 / sample_count; 3380 3381 free(freq_intervals); 3382 free(intervals); 3383 return powerOf2; 3384 } 3385 3386 unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) 3387 { 3388 double mean = 0.0; 3389 size_t len = r1 * r2 * r3; 3390 size_t mean_distance = (int) (sqrt(len)); 3391 double * data_pos = oriData; 3392 size_t offset_count = 0; 3393 size_t offset_count_2 = 0; 3394 size_t mean_count = 0; 3395 while(data_pos - oriData < len){ 3396 mean += *data_pos; 3397 mean_count ++; 3398 data_pos += mean_distance; 3399 offset_count += mean_distance; 3400 offset_count_2 += mean_distance; 3401 if(offset_count >= r3){ 3402 offset_count = 0; 3403 data_pos -= 1; 3404 } 3405 if(offset_count_2 >= r2 * r3){ 3406 offset_count_2 = 0; 3407 data_pos -= 1; 3408 } 3409 } 3410 if(mean_count > 0) mean /= mean_count; 3411 size_t range = 8192; 3412 size_t radius = 4096; 3413 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 3414 memset(freq_intervals, 0, range*sizeof(size_t)); 3415 3416 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 3417 int sampleDistance = confparams_cpr->sampleDistance; 3418 double predThreshold = confparams_cpr->predThreshold; 3419 3420 size_t i; 3421 size_t radiusIndex; 3422 size_t r23=r2*r3; 3423 double pred_value = 0, pred_err; 3424 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 3425 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 3426 3427 double mean_diff; 3428 ptrdiff_t freq_index; 3429 size_t freq_count = 0; 3430 size_t sample_count = 0; 3431 3432 offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset 3433 data_pos = oriData + r23 + r3 + offset_count; 3434 size_t n1_count = 1, n2_count = 1; // count i,j sum 3435 3436 while(data_pos - oriData < len){ 3437 3438 pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; 3439 pred_err = fabs(pred_value - *data_pos); 3440 if(pred_err < realPrecision) freq_count ++; 3441 radiusIndex = (pred_err/realPrecision+1)/2; 3442 if(radiusIndex>=maxRangeRadius) 3443 { 3444 radiusIndex = maxRangeRadius - 1; 3445 } 3446 intervals[radiusIndex]++; 3447 3448 mean_diff = *data_pos - mean; 3449 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 3450 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 3451 if(freq_index <= 0){ 3452 freq_intervals[0] ++; 3453 } 3454 else if(freq_index >= range){ 3455 freq_intervals[range - 1] ++; 3456 } 3457 else{ 3458 freq_intervals[freq_index] ++; 3459 } 3460 offset_count += sampleDistance; 3461 if(offset_count >= r3){ 3462 n2_count ++; 3463 if(n2_count == r2){ 3464 n1_count ++; 3465 n2_count = 1; 3466 data_pos += r3; 3467 } 3468 offset_count_2 = (n1_count + n2_count) % sampleDistance; 3469 data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 3470 offset_count = (sampleDistance - offset_count_2); 3471 if(offset_count == 0) offset_count ++; 3472 } 3473 else data_pos += sampleDistance; 3474 sample_count ++; 3475 } 3476 *max_freq = freq_count * 1.0/ sample_count; 3477 3478 //compute the appropriate number 3479 size_t targetCount = sample_count*predThreshold; 3480 size_t sum = 0; 3481 for(i=0;i<maxRangeRadius;i++) 3482 { 3483 sum += intervals[i]; 3484 if(sum>targetCount) 3485 break; 3486 } 3487 if(i>=maxRangeRadius) 3488 i = maxRangeRadius-1; 3489 unsigned int accIntervals = 2*(i+1); 3490 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 3491 3492 if(powerOf2<32) 3493 powerOf2 = 32; 3494 // collect frequency 3495 size_t max_sum = 0; 3496 size_t max_index = 0; 3497 size_t tmp_sum; 3498 size_t * freq_pos = freq_intervals + 1; 3499 for(size_t i=1; i<range-2; i++){ 3500 tmp_sum = freq_pos[0] + freq_pos[1]; 3501 if(tmp_sum > max_sum){ 3502 max_sum = tmp_sum; 3503 max_index = i; 3504 } 3505 freq_pos ++; 3506 } 3507 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 3508 *mean_freq = max_sum * 1.0 / sample_count; 3509 3510 free(freq_intervals); 3511 free(intervals); 3512 return powerOf2; 3513 } 3514 3515 #define MIN(a, b) a<b? a : b 3516 unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ 3517 3518 unsigned int quantization_intervals; 3519 double sz_sample_correct_freq = -1;//0.5; //-1 3520 double dense_pos; 3521 double mean_flush_freq; 3522 unsigned char use_mean = 0; 3523 3524 if(exe_params->optQuantMode==1) 3525 { 3526 quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 3527 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 3528 updateQuantizationInfo(quantization_intervals); 3529 } 3530 else{ 3531 quantization_intervals = exe_params->intvCapacity; 3532 } 3533 3534 // calculate block dims 3535 size_t num_x, num_y; 3536 size_t block_size = 16; 3537 3538 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 3539 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 3540 3541 size_t split_index_x, split_index_y; 3542 size_t early_blockcount_x, early_blockcount_y; 3543 size_t late_blockcount_x, late_blockcount_y; 3544 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 3545 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 3546 3547 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; 3548 size_t num_blocks = num_x * num_y; 3549 size_t num_elements = r1 * r2; 3550 3551 size_t dim0_offset = r2; 3552 3553 int * result_type = (int *) malloc(num_elements * sizeof(int)); 3554 size_t unpred_data_max_size = max_num_block_elements; 3555 double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); 3556 size_t total_unpred = 0; 3557 size_t unpredictable_count; 3558 double * data_pos = oriData; 3559 int * type = result_type; 3560 size_t offset_x, offset_y; 3561 size_t current_blockcount_x, current_blockcount_y; 3562 3563 double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); 3564 double * reg_params_pos = reg_params; 3565 // move regression part out 3566 size_t params_offset_b = num_blocks; 3567 size_t params_offset_c = 2*num_blocks; 3568 for(size_t i=0; i<num_x; i++){ 3569 for(size_t j=0; j<num_y; j++){ 3570 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3571 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3572 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3573 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3574 3575 data_pos = oriData + offset_x * dim0_offset + offset_y; 3576 3577 { 3578 double * cur_data_pos = data_pos; 3579 double fx = 0.0; 3580 double fy = 0.0; 3581 double f = 0; 3582 double sum_x; 3583 double curData; 3584 for(size_t i=0; i<current_blockcount_x; i++){ 3585 sum_x = 0; 3586 for(size_t j=0; j<current_blockcount_y; j++){ 3587 curData = *cur_data_pos; 3588 sum_x += curData; 3589 fy += curData * j; 3590 cur_data_pos ++; 3591 } 3592 fx += sum_x * i; 3593 f += sum_x; 3594 cur_data_pos += dim0_offset - current_blockcount_y; 3595 } 3596 double coeff = 1.0 / (current_blockcount_x * current_blockcount_y); 3597 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 3598 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 3599 reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2); 3600 } 3601 3602 reg_params_pos ++; 3603 } 3604 } 3605 3606 //Compress coefficient arrays 3607 double precision_a, precision_b, precision_c; 3608 double rel_param_err = 0.15/3; 3609 precision_a = rel_param_err * realPrecision / late_blockcount_x; 3610 precision_b = rel_param_err * realPrecision / late_blockcount_y; 3611 precision_c = rel_param_err * realPrecision; 3612 3613 double mean = 0; 3614 use_mean = 0; 3615 if(use_mean){ 3616 // compute mean 3617 double sum = 0.0; 3618 size_t mean_count = 0; 3619 for(size_t i=0; i<num_elements; i++){ 3620 if(fabs(oriData[i] - dense_pos) < realPrecision){ 3621 sum += oriData[i]; 3622 mean_count ++; 3623 } 3624 } 3625 if(mean_count > 0) mean = sum / mean_count; 3626 } 3627 3628 3629 double tmp_realPrecision = realPrecision; 3630 3631 // use two prediction buffers for higher performance 3632 double * unpredictable_data = result_unpredictable_data; 3633 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 3634 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 3635 size_t reg_count = 0; 3636 size_t strip_dim_0 = early_blockcount_x + 1; 3637 size_t strip_dim_1 = r2 + 1; 3638 size_t strip_dim0_offset = strip_dim_1; 3639 unsigned char * indicator_pos = indicator; 3640 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); 3641 double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); 3642 memset(prediction_buffer_1, 0, prediction_buffer_size); 3643 double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); 3644 memset(prediction_buffer_2, 0, prediction_buffer_size); 3645 double * cur_pb_buf = prediction_buffer_1; 3646 double * next_pb_buf = prediction_buffer_2; 3647 double * cur_pb_buf_pos; 3648 double * next_pb_buf_pos; 3649 int intvCapacity = exe_params->intvCapacity; 3650 int intvRadius = exe_params->intvRadius; 3651 int use_reg = 0; 3652 3653 reg_params_pos = reg_params; 3654 // compress the regression coefficients on the fly 3655 double last_coeffcients[3] = {0.0}; 3656 int coeff_intvCapacity_sz = 65536; 3657 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 3658 int * coeff_type[3]; 3659 int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); 3660 double * coeff_unpred_data[3]; 3661 double * coeff_unpredictable_data = (double *) malloc(num_blocks*3*sizeof(double)); 3662 double precision[3]; 3663 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; 3664 for(int i=0; i<3; i++){ 3665 coeff_type[i] = coeff_result_type + i * num_blocks; 3666 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 3667 } 3668 int coeff_index = 0; 3669 unsigned int coeff_unpredictable_count[3] = {0}; 3670 if(use_mean){ 3671 type = result_type; 3672 int intvCapacity_sz = intvCapacity - 2; 3673 for(size_t i=0; i<num_x; i++){ 3674 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3675 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3676 data_pos = oriData + offset_x * dim0_offset; 3677 3678 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 3679 next_pb_buf_pos = next_pb_buf + 1; 3680 double * pb_pos = cur_pb_buf_pos; 3681 double * next_pb_pos = next_pb_buf_pos; 3682 3683 for(size_t j=0; j<num_y; j++){ 3684 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3685 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3686 3687 /*sampling: decide which predictor to use (regression or lorenzo)*/ 3688 { 3689 double * cur_data_pos; 3690 double curData; 3691 double pred_reg, pred_sz; 3692 double err_sz = 0.0, err_reg = 0.0; 3693 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 3694 // [1, 9] [3, 7] [7, 3] [9, 1] 3695 int count = 0; 3696 for(int i=1; i<current_blockcount_x; i+=2){ 3697 cur_data_pos = data_pos + i * dim0_offset + i; 3698 curData = *cur_data_pos; 3699 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 3700 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 3701 3702 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 3703 3704 err_reg += fabs(pred_reg - curData); 3705 3706 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 3707 curData = *cur_data_pos; 3708 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 3709 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 3710 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 3711 3712 err_reg += fabs(pred_reg - curData); 3713 3714 count += 2; 3715 } 3716 3717 use_reg = (err_reg < err_sz); 3718 } 3719 if(use_reg) 3720 { 3721 { 3722 /*predict coefficients in current block via previous reg_block*/ 3723 double cur_coeff; 3724 double diff, itvNum; 3725 for(int e=0; e<3; e++){ 3726 cur_coeff = reg_params_pos[e*num_blocks]; 3727 diff = cur_coeff - last_coeffcients[e]; 3728 itvNum = fabs(diff)/precision[e] + 1; 3729 if (itvNum < coeff_intvCapacity_sz){ 3730 if (diff < 0) itvNum = -itvNum; 3731 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 3732 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 3733 //ganrantee comporession error against the case of machine-epsilon 3734 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 3735 coeff_type[e][coeff_index] = 0; 3736 last_coeffcients[e] = cur_coeff; 3737 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 3738 } 3739 } 3740 else{ 3741 coeff_type[e][coeff_index] = 0; 3742 last_coeffcients[e] = cur_coeff; 3743 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 3744 } 3745 } 3746 coeff_index ++; 3747 } 3748 double curData; 3749 double pred; 3750 double itvNum; 3751 double diff; 3752 size_t index = 0; 3753 size_t block_unpredictable_count = 0; 3754 double * cur_data_pos = data_pos; 3755 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 3756 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 3757 curData = *cur_data_pos; 3758 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3759 diff = curData - pred; 3760 itvNum = fabs(diff)/realPrecision + 1; 3761 if (itvNum < intvCapacity){ 3762 if (diff < 0) itvNum = -itvNum; 3763 type[index] = (int) (itvNum/2) + intvRadius; 3764 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3765 //ganrantee comporession error against the case of machine-epsilon 3766 if(fabs(curData - pred)>realPrecision){ 3767 type[index] = 0; 3768 pred = curData; 3769 unpredictable_data[block_unpredictable_count ++] = curData; 3770 } 3771 } 3772 else{ 3773 type[index] = 0; 3774 pred = curData; 3775 unpredictable_data[block_unpredictable_count ++] = curData; 3776 } 3777 index ++; 3778 cur_data_pos ++; 3779 } 3780 /*dealing with the last jj (boundary)*/ 3781 { 3782 size_t jj = current_blockcount_y - 1; 3783 curData = *cur_data_pos; 3784 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3785 diff = curData - pred; 3786 itvNum = fabs(diff)/realPrecision + 1; 3787 if (itvNum < intvCapacity){ 3788 if (diff < 0) itvNum = -itvNum; 3789 type[index] = (int) (itvNum/2) + intvRadius; 3790 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3791 //ganrantee comporession error against the case of machine-epsilon 3792 if(fabs(curData - pred)>realPrecision){ 3793 type[index] = 0; 3794 pred = curData; 3795 unpredictable_data[block_unpredictable_count ++] = curData; 3796 } 3797 } 3798 else{ 3799 type[index] = 0; 3800 pred = curData; 3801 unpredictable_data[block_unpredictable_count ++] = curData; 3802 } 3803 3804 // assign value to block surfaces 3805 pb_pos[ii * strip_dim0_offset + jj] = pred; 3806 index ++; 3807 cur_data_pos ++; 3808 } 3809 cur_data_pos += dim0_offset - current_blockcount_y; 3810 } 3811 /*dealing with the last ii (boundary)*/ 3812 { 3813 size_t ii = current_blockcount_x - 1; 3814 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 3815 curData = *cur_data_pos; 3816 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3817 diff = curData - pred; 3818 itvNum = fabs(diff)/realPrecision + 1; 3819 if (itvNum < intvCapacity){ 3820 if (diff < 0) itvNum = -itvNum; 3821 type[index] = (int) (itvNum/2) + intvRadius; 3822 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3823 //ganrantee comporession error against the case of machine-epsilon 3824 if(fabs(curData - pred)>realPrecision){ 3825 type[index] = 0; 3826 pred = curData; 3827 unpredictable_data[block_unpredictable_count ++] = curData; 3828 } 3829 } 3830 else{ 3831 type[index] = 0; 3832 pred = curData; 3833 unpredictable_data[block_unpredictable_count ++] = curData; 3834 } 3835 // assign value to next prediction buffer 3836 next_pb_pos[jj] = pred; 3837 index ++; 3838 cur_data_pos ++; 3839 } 3840 /*dealing with the last jj (boundary)*/ 3841 { 3842 size_t jj = current_blockcount_y - 1; 3843 curData = *cur_data_pos; 3844 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3845 diff = curData - pred; 3846 itvNum = fabs(diff)/realPrecision + 1; 3847 if (itvNum < intvCapacity){ 3848 if (diff < 0) itvNum = -itvNum; 3849 type[index] = (int) (itvNum/2) + intvRadius; 3850 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3851 //ganrantee comporession error against the case of machine-epsilon 3852 if(fabs(curData - pred)>realPrecision){ 3853 type[index] = 0; 3854 pred = curData; 3855 unpredictable_data[block_unpredictable_count ++] = curData; 3856 } 3857 } 3858 else{ 3859 type[index] = 0; 3860 pred = curData; 3861 unpredictable_data[block_unpredictable_count ++] = curData; 3862 } 3863 3864 // assign value to block surfaces 3865 pb_pos[ii * strip_dim0_offset + jj] = pred; 3866 // assign value to next prediction buffer 3867 next_pb_pos[jj] = pred; 3868 3869 index ++; 3870 cur_data_pos ++; 3871 } 3872 } // end ii == -1 3873 unpredictable_count = block_unpredictable_count; 3874 total_unpred += unpredictable_count; 3875 unpredictable_data += unpredictable_count; 3876 reg_count ++; 3877 }// end use_reg 3878 else{ 3879 // use SZ 3880 // SZ predication 3881 unpredictable_count = 0; 3882 double * cur_pb_pos = pb_pos; 3883 double * cur_data_pos = data_pos; 3884 double curData; 3885 double pred2D; 3886 double itvNum, diff; 3887 size_t index = 0; 3888 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 3889 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3890 curData = *cur_data_pos; 3891 if(fabs(curData - mean) <= realPrecision){ 3892 // adjust type[index] to intvRadius for coherence with freq in reg 3893 type[index] = intvRadius; 3894 *cur_pb_pos = mean; 3895 } 3896 else 3897 { 3898 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 3899 diff = curData - pred2D; 3900 itvNum = fabs(diff)/realPrecision + 1; 3901 if (itvNum < intvCapacity_sz){ 3902 if (diff < 0) itvNum = -itvNum; 3903 type[index] = (int) (itvNum/2) + intvRadius; 3904 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 3905 if(type[index] <= intvRadius) type[index] -= 1; 3906 //ganrantee comporession error against the case of machine-epsilon 3907 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 3908 type[index] = 0; 3909 *cur_pb_pos = curData; 3910 unpredictable_data[unpredictable_count ++] = curData; 3911 } 3912 } 3913 else{ 3914 type[index] = 0; 3915 *cur_pb_pos = curData; 3916 unpredictable_data[unpredictable_count ++] = curData; 3917 } 3918 } 3919 index ++; 3920 cur_pb_pos ++; 3921 cur_data_pos ++; 3922 } 3923 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 3924 cur_data_pos += dim0_offset - current_blockcount_y; 3925 } 3926 /*dealing with the last ii (boundary)*/ 3927 { 3928 // ii == current_blockcount_x - 1 3929 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3930 curData = *cur_data_pos; 3931 if(fabs(curData - mean) <= realPrecision){ 3932 // adjust type[index] to intvRadius for coherence with freq in reg 3933 type[index] = intvRadius; 3934 *cur_pb_pos = mean; 3935 } 3936 else 3937 { 3938 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 3939 diff = curData - pred2D; 3940 itvNum = fabs(diff)/realPrecision + 1; 3941 if (itvNum < intvCapacity_sz){ 3942 if (diff < 0) itvNum = -itvNum; 3943 type[index] = (int) (itvNum/2) + intvRadius; 3944 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 3945 if(type[index] <= intvRadius) type[index] -= 1; 3946 //ganrantee comporession error against the case of machine-epsilon 3947 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 3948 type[index] = 0; 3949 *cur_pb_pos = curData; 3950 unpredictable_data[unpredictable_count ++] = curData; 3951 } 3952 } 3953 else{ 3954 type[index] = 0; 3955 *cur_pb_pos = curData; 3956 unpredictable_data[unpredictable_count ++] = curData; 3957 } 3958 } 3959 next_pb_pos[jj] = *cur_pb_pos; 3960 index ++; 3961 cur_pb_pos ++; 3962 cur_data_pos ++; 3963 } 3964 } 3965 total_unpred += unpredictable_count; 3966 unpredictable_data += unpredictable_count; 3967 // change indicator 3968 indicator_pos[j] = 1; 3969 }// end SZ 3970 reg_params_pos ++; 3971 data_pos += current_blockcount_y; 3972 pb_pos += current_blockcount_y; 3973 next_pb_pos += current_blockcount_y; 3974 type += current_blockcount_x * current_blockcount_y; 3975 }// end j 3976 indicator_pos += num_y; 3977 double * tmp; 3978 tmp = cur_pb_buf; 3979 cur_pb_buf = next_pb_buf; 3980 next_pb_buf = tmp; 3981 }// end i 3982 }// end use mean 3983 else{ 3984 type = result_type; 3985 int intvCapacity_sz = intvCapacity - 2; 3986 for(size_t i=0; i<num_x; i++){ 3987 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3988 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3989 data_pos = oriData + offset_x * dim0_offset; 3990 3991 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 3992 next_pb_buf_pos = next_pb_buf + 1; 3993 double * pb_pos = cur_pb_buf_pos; 3994 double * next_pb_pos = next_pb_buf_pos; 3995 3996 for(size_t j=0; j<num_y; j++){ 3997 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3998 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3999 /*sampling*/ 4000 { 4001 // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i] 4002 double * cur_data_pos; 4003 double curData; 4004 double pred_reg, pred_sz; 4005 double err_sz = 0.0, err_reg = 0.0; 4006 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 4007 // [1, 9] [3, 7] [7, 3] [9, 1] 4008 int count = 0; 4009 for(int i=1; i<current_blockcount_x; i+=2){ 4010 cur_data_pos = data_pos + i * dim0_offset + i; 4011 curData = *cur_data_pos; 4012 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4013 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 4014 err_sz += fabs(pred_sz - curData); 4015 err_reg += fabs(pred_reg - curData); 4016 4017 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 4018 curData = *cur_data_pos; 4019 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4020 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 4021 err_sz += fabs(pred_sz - curData); 4022 err_reg += fabs(pred_reg - curData); 4023 4024 count += 2; 4025 } 4026 err_sz += realPrecision * count * 0.81; 4027 use_reg = (err_reg < err_sz); 4028 4029 } 4030 if(use_reg) 4031 { 4032 { 4033 /*predict coefficients in current block via previous reg_block*/ 4034 double cur_coeff; 4035 double diff, itvNum; 4036 for(int e=0; e<3; e++){ 4037 cur_coeff = reg_params_pos[e*num_blocks]; 4038 diff = cur_coeff - last_coeffcients[e]; 4039 itvNum = fabs(diff)/precision[e] + 1; 4040 if (itvNum < coeff_intvCapacity_sz){ 4041 if (diff < 0) itvNum = -itvNum; 4042 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4043 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4044 //ganrantee comporession error against the case of machine-epsilon 4045 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4046 coeff_type[e][coeff_index] = 0; 4047 last_coeffcients[e] = cur_coeff; 4048 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4049 } 4050 } 4051 else{ 4052 coeff_type[e][coeff_index] = 0; 4053 last_coeffcients[e] = cur_coeff; 4054 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4055 } 4056 } 4057 coeff_index ++; 4058 } 4059 double curData; 4060 double pred; 4061 double itvNum; 4062 double diff; 4063 size_t index = 0; 4064 size_t block_unpredictable_count = 0; 4065 double * cur_data_pos = data_pos; 4066 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4067 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4068 curData = *cur_data_pos; 4069 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4070 diff = curData - pred; 4071 itvNum = fabs(diff)/realPrecision + 1; 4072 if (itvNum < intvCapacity){ 4073 if (diff < 0) itvNum = -itvNum; 4074 type[index] = (int) (itvNum/2) + intvRadius; 4075 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4076 //ganrantee comporession error against the case of machine-epsilon 4077 if(fabs(curData - pred)>realPrecision){ 4078 type[index] = 0; 4079 pred = curData; 4080 unpredictable_data[block_unpredictable_count ++] = curData; 4081 } 4082 } 4083 else{ 4084 type[index] = 0; 4085 pred = curData; 4086 unpredictable_data[block_unpredictable_count ++] = curData; 4087 } 4088 index ++; 4089 cur_data_pos ++; 4090 } 4091 /*dealing with the last jj (boundary)*/ 4092 { 4093 // jj == current_blockcount_y - 1 4094 size_t jj = current_blockcount_y - 1; 4095 curData = *cur_data_pos; 4096 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4097 diff = curData - pred; 4098 itvNum = fabs(diff)/realPrecision + 1; 4099 if (itvNum < intvCapacity){ 4100 if (diff < 0) itvNum = -itvNum; 4101 type[index] = (int) (itvNum/2) + intvRadius; 4102 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4103 //ganrantee comporession error against the case of machine-epsilon 4104 if(fabs(curData - pred)>realPrecision){ 4105 type[index] = 0; 4106 pred = curData; 4107 unpredictable_data[block_unpredictable_count ++] = curData; 4108 } 4109 } 4110 else{ 4111 type[index] = 0; 4112 pred = curData; 4113 unpredictable_data[block_unpredictable_count ++] = curData; 4114 } 4115 4116 // assign value to block surfaces 4117 pb_pos[ii * strip_dim0_offset + jj] = pred; 4118 index ++; 4119 cur_data_pos ++; 4120 } 4121 cur_data_pos += dim0_offset - current_blockcount_y; 4122 } 4123 /*dealing with the last ii (boundary)*/ 4124 { 4125 size_t ii = current_blockcount_x - 1; 4126 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4127 curData = *cur_data_pos; 4128 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4129 diff = curData - pred; 4130 itvNum = fabs(diff)/realPrecision + 1; 4131 if (itvNum < intvCapacity){ 4132 if (diff < 0) itvNum = -itvNum; 4133 type[index] = (int) (itvNum/2) + intvRadius; 4134 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4135 //ganrantee comporession error against the case of machine-epsilon 4136 if(fabs(curData - pred)>realPrecision){ 4137 type[index] = 0; 4138 pred = curData; 4139 unpredictable_data[block_unpredictable_count ++] = curData; 4140 } 4141 } 4142 else{ 4143 type[index] = 0; 4144 pred = curData; 4145 unpredictable_data[block_unpredictable_count ++] = curData; 4146 } 4147 // assign value to next prediction buffer 4148 next_pb_pos[jj] = pred; 4149 index ++; 4150 cur_data_pos ++; 4151 } 4152 /*dealing with the last jj (boundary)*/ 4153 { 4154 // jj == current_blockcount_y - 1 4155 size_t jj = current_blockcount_y - 1; 4156 curData = *cur_data_pos; 4157 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4158 diff = curData - pred; 4159 itvNum = fabs(diff)/realPrecision + 1; 4160 if (itvNum < intvCapacity){ 4161 if (diff < 0) itvNum = -itvNum; 4162 type[index] = (int) (itvNum/2) + intvRadius; 4163 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4164 //ganrantee comporession error against the case of machine-epsilon 4165 if(fabs(curData - pred)>realPrecision){ 4166 type[index] = 0; 4167 pred = curData; 4168 unpredictable_data[block_unpredictable_count ++] = curData; 4169 } 4170 } 4171 else{ 4172 type[index] = 0; 4173 pred = curData; 4174 unpredictable_data[block_unpredictable_count ++] = curData; 4175 } 4176 4177 // assign value to block surfaces 4178 pb_pos[ii * strip_dim0_offset + jj] = pred; 4179 // assign value to next prediction buffer 4180 next_pb_pos[jj] = pred; 4181 4182 index ++; 4183 cur_data_pos ++; 4184 } 4185 } // end ii == -1 4186 unpredictable_count = block_unpredictable_count; 4187 total_unpred += unpredictable_count; 4188 unpredictable_data += unpredictable_count; 4189 reg_count ++; 4190 }// end use_reg 4191 else{ 4192 // use SZ 4193 // SZ predication 4194 unpredictable_count = 0; 4195 double * cur_pb_pos = pb_pos; 4196 double * cur_data_pos = data_pos; 4197 double curData; 4198 double pred2D; 4199 double itvNum, diff; 4200 size_t index = 0; 4201 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4202 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4203 curData = *cur_data_pos; 4204 4205 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4206 diff = curData - pred2D; 4207 itvNum = fabs(diff)/realPrecision + 1; 4208 if (itvNum < intvCapacity_sz){ 4209 if (diff < 0) itvNum = -itvNum; 4210 type[index] = (int) (itvNum/2) + intvRadius; 4211 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4212 //ganrantee comporession error against the case of machine-epsilon 4213 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4214 type[index] = 0; 4215 *cur_pb_pos = curData; 4216 unpredictable_data[unpredictable_count ++] = curData; 4217 } 4218 } 4219 else{ 4220 type[index] = 0; 4221 *cur_pb_pos = curData; 4222 unpredictable_data[unpredictable_count ++] = curData; 4223 } 4224 4225 index ++; 4226 cur_pb_pos ++; 4227 cur_data_pos ++; 4228 } 4229 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 4230 cur_data_pos += dim0_offset - current_blockcount_y; 4231 } 4232 /*dealing with the last ii (boundary)*/ 4233 { 4234 // ii == current_blockcount_x - 1 4235 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4236 curData = *cur_data_pos; 4237 4238 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4239 diff = curData - pred2D; 4240 itvNum = fabs(diff)/realPrecision + 1; 4241 if (itvNum < intvCapacity_sz){ 4242 if (diff < 0) itvNum = -itvNum; 4243 type[index] = (int) (itvNum/2) + intvRadius; 4244 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4245 //ganrantee comporession error against the case of machine-epsilon 4246 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4247 type[index] = 0; 4248 *cur_pb_pos = curData; 4249 unpredictable_data[unpredictable_count ++] = curData; 4250 } 4251 } 4252 else{ 4253 type[index] = 0; 4254 *cur_pb_pos = curData; 4255 unpredictable_data[unpredictable_count ++] = curData; 4256 } 4257 next_pb_pos[jj] = *cur_pb_pos; 4258 index ++; 4259 cur_pb_pos ++; 4260 cur_data_pos ++; 4261 } 4262 } 4263 total_unpred += unpredictable_count; 4264 unpredictable_data += unpredictable_count; 4265 // change indicator 4266 indicator_pos[j] = 1; 4267 }// end SZ 4268 reg_params_pos ++; 4269 data_pos += current_blockcount_y; 4270 pb_pos += current_blockcount_y; 4271 next_pb_pos += current_blockcount_y; 4272 type += current_blockcount_x * current_blockcount_y; 4273 }// end j 4274 indicator_pos += num_y; 4275 double * tmp; 4276 tmp = cur_pb_buf; 4277 cur_pb_buf = next_pb_buf; 4278 next_pb_buf = tmp; 4279 }// end i 4280 } 4281 free(prediction_buffer_1); 4282 free(prediction_buffer_2); 4283 4284 int stateNum = 2*quantization_intervals; 4285 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4286 4287 size_t nodeCount = 0; 4288 size_t i = 0; 4289 init(huffmanTree, result_type, num_elements); 4290 for (i = 0; i < stateNum; i++) 4291 if (huffmanTree->code[i]) nodeCount++; 4292 nodeCount = nodeCount*2-1; 4293 4294 unsigned char *treeBytes; 4295 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4296 4297 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 4298 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 4299 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); 4300 unsigned char * result_pos = result; 4301 initRandomAccessBytes(result_pos); 4302 result_pos += meta_data_offset; 4303 4304 sizeToBytes(result_pos, num_elements); 4305 result_pos += exe_params->SZ_SIZE_TYPE; 4306 4307 intToBytes_bigEndian(result_pos, block_size); 4308 result_pos += sizeof(int); 4309 doubleToBytes(result_pos, realPrecision); 4310 result_pos += sizeof(double); 4311 intToBytes_bigEndian(result_pos, quantization_intervals); 4312 result_pos += sizeof(int); 4313 intToBytes_bigEndian(result_pos, treeByteSize); 4314 result_pos += sizeof(int); 4315 intToBytes_bigEndian(result_pos, nodeCount); 4316 result_pos += sizeof(int); 4317 memcpy(result_pos, treeBytes, treeByteSize); 4318 result_pos += treeByteSize; 4319 free(treeBytes); 4320 4321 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 4322 result_pos += sizeof(unsigned char); 4323 memcpy(result_pos, &mean, sizeof(double)); 4324 result_pos += sizeof(double); 4325 4326 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 4327 result_pos += indicator_size; 4328 4329 //convert the lead/mid/resi to byte stream 4330 if(reg_count>0){ 4331 for(int e=0; e<3; e++){ 4332 int stateNum = 2*coeff_intvCapacity_sz; 4333 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4334 size_t nodeCount = 0; 4335 init(huffmanTree, coeff_type[e], reg_count); 4336 size_t i = 0; 4337 for (i = 0; i < huffmanTree->stateNum; i++) 4338 if (huffmanTree->code[i]) nodeCount++; 4339 nodeCount = nodeCount*2-1; 4340 unsigned char *treeBytes; 4341 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4342 doubleToBytes(result_pos, precision[e]); 4343 result_pos += sizeof(double); 4344 intToBytes_bigEndian(result_pos, coeff_intvRadius); 4345 result_pos += sizeof(int); 4346 intToBytes_bigEndian(result_pos, treeByteSize); 4347 result_pos += sizeof(int); 4348 intToBytes_bigEndian(result_pos, nodeCount); 4349 result_pos += sizeof(int); 4350 memcpy(result_pos, treeBytes, treeByteSize); 4351 result_pos += treeByteSize; 4352 free(treeBytes); 4353 size_t typeArray_size = 0; 4354 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 4355 sizeToBytes(result_pos, typeArray_size); 4356 result_pos += sizeof(size_t) + typeArray_size; 4357 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 4358 result_pos += sizeof(int); 4359 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); 4360 result_pos += coeff_unpredictable_count[e]*sizeof(double); 4361 SZ_ReleaseHuffman(huffmanTree); 4362 } 4363 } 4364 free(coeff_result_type); 4365 free(coeff_unpredictable_data); 4366 4367 //record the number of unpredictable data and also store them 4368 memcpy(result_pos, &total_unpred, sizeof(size_t)); 4369 result_pos += sizeof(size_t); 4370 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); 4371 result_pos += total_unpred * sizeof(double); 4372 size_t typeArray_size = 0; 4373 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 4374 result_pos += typeArray_size; 4375 4376 size_t totalEncodeSize = result_pos - result; 4377 free(indicator); 4378 free(result_unpredictable_data); 4379 free(result_type); 4380 free(reg_params); 4381 4382 SZ_ReleaseHuffman(huffmanTree); 4383 *comp_size = totalEncodeSize; 4384 4385 return result; 4386 } 4387 4388 unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ 4389 4390 unsigned int quantization_intervals; 4391 double sz_sample_correct_freq = -1;//0.5; //-1 4392 double dense_pos; 4393 double mean_flush_freq; 4394 unsigned char use_mean = 0; 4395 4396 // calculate block dims 4397 size_t num_x, num_y, num_z; 4398 size_t block_size = 6; 4399 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 4400 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 4401 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); 4402 4403 size_t split_index_x, split_index_y, split_index_z; 4404 size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; 4405 size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; 4406 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 4407 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 4408 SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); 4409 4410 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; 4411 size_t num_blocks = num_x * num_y * num_z; 4412 size_t num_elements = r1 * r2 * r3; 4413 4414 size_t dim0_offset = r2 * r3; 4415 size_t dim1_offset = r3; 4416 4417 int * result_type = (int *) malloc(num_elements * sizeof(int)); 4418 size_t unpred_data_max_size = max_num_block_elements; 4419 double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); 4420 size_t total_unpred = 0; 4421 size_t unpredictable_count; 4422 size_t max_unpred_count = 0; 4423 double * data_pos = oriData; 4424 int * type = result_type; 4425 size_t type_offset; 4426 size_t offset_x, offset_y, offset_z; 4427 size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; 4428 4429 double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); 4430 double * reg_params_pos = reg_params; 4431 // move regression part out 4432 size_t params_offset_b = num_blocks; 4433 size_t params_offset_c = 2*num_blocks; 4434 size_t params_offset_d = 3*num_blocks; 4435 for(size_t i=0; i<num_x; i++){ 4436 for(size_t j=0; j<num_y; j++){ 4437 for(size_t k=0; k<num_z; k++){ 4438 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4439 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4440 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4441 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4442 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4443 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4444 4445 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 4446 /*Calculate regression coefficients*/ 4447 { 4448 double * cur_data_pos = data_pos; 4449 double fx = 0.0; 4450 double fy = 0.0; 4451 double fz = 0.0; 4452 double f = 0; 4453 double sum_x, sum_y; 4454 double curData; 4455 for(size_t i=0; i<current_blockcount_x; i++){ 4456 sum_x = 0; 4457 for(size_t j=0; j<current_blockcount_y; j++){ 4458 sum_y = 0; 4459 for(size_t k=0; k<current_blockcount_z; k++){ 4460 curData = *cur_data_pos; 4461 // f += curData; 4462 // fx += curData * i; 4463 // fy += curData * j; 4464 // fz += curData * k; 4465 sum_y += curData; 4466 fz += curData * k; 4467 cur_data_pos ++; 4468 } 4469 fy += sum_y * j; 4470 sum_x += sum_y; 4471 cur_data_pos += dim1_offset - current_blockcount_z; 4472 } 4473 fx += sum_x * i; 4474 f += sum_x; 4475 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4476 } 4477 double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z); 4478 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 4479 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 4480 reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1); 4481 reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2); 4482 } 4483 reg_params_pos ++; 4484 } 4485 } 4486 } 4487 4488 //Compress coefficient arrays 4489 double precision_a, precision_b, precision_c, precision_d; 4490 double rel_param_err = 0.025; 4491 precision_a = rel_param_err * realPrecision / late_blockcount_x; 4492 precision_b = rel_param_err * realPrecision / late_blockcount_y; 4493 precision_c = rel_param_err * realPrecision / late_blockcount_z; 4494 precision_d = rel_param_err * realPrecision; 4495 4496 if(exe_params->optQuantMode==1) 4497 { 4498 quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 4499 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 4500 updateQuantizationInfo(quantization_intervals); 4501 } 4502 else{ 4503 quantization_intervals = exe_params->intvCapacity; 4504 } 4505 4506 double mean = 0; 4507 if(use_mean){ 4508 // compute mean 4509 double sum = 0.0; 4510 size_t mean_count = 0; 4511 for(size_t i=0; i<num_elements; i++){ 4512 if(fabs(oriData[i] - dense_pos) < realPrecision){ 4513 sum += oriData[i]; 4514 mean_count ++; 4515 } 4516 } 4517 if(mean_count > 0) mean = sum / mean_count; 4518 } 4519 4520 double tmp_realPrecision = realPrecision; 4521 4522 // use two prediction buffers for higher performance 4523 double * unpredictable_data = result_unpredictable_data; 4524 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 4525 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 4526 size_t reg_count = 0; 4527 size_t strip_dim_0 = early_blockcount_x + 1; 4528 size_t strip_dim_1 = r2 + 1; 4529 size_t strip_dim_2 = r3 + 1; 4530 size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; 4531 size_t strip_dim1_offset = strip_dim_2; 4532 unsigned char * indicator_pos = indicator; 4533 4534 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); 4535 double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); 4536 memset(prediction_buffer_1, 0, prediction_buffer_size); 4537 double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); 4538 memset(prediction_buffer_2, 0, prediction_buffer_size); 4539 double * cur_pb_buf = prediction_buffer_1; 4540 double * next_pb_buf = prediction_buffer_2; 4541 double * cur_pb_buf_pos; 4542 double * next_pb_buf_pos; 4543 int intvCapacity = exe_params->intvCapacity; 4544 int intvRadius = exe_params->intvRadius; 4545 int use_reg = 0; 4546 double noise = realPrecision * 1.22; 4547 4548 reg_params_pos = reg_params; 4549 // compress the regression coefficients on the fly 4550 double last_coeffcients[4] = {0.0}; 4551 int coeff_intvCapacity_sz = 65536; 4552 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 4553 int * coeff_type[4]; 4554 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 4555 double * coeff_unpred_data[4]; 4556 double * coeff_unpredictable_data = (double *) malloc(num_blocks*4*sizeof(double)); 4557 double precision[4]; 4558 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; 4559 for(int i=0; i<4; i++){ 4560 coeff_type[i] = coeff_result_type + i * num_blocks; 4561 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 4562 } 4563 int coeff_index = 0; 4564 unsigned int coeff_unpredictable_count[4] = {0}; 4565 4566 if(use_mean){ 4567 int intvCapacity_sz = intvCapacity - 2; 4568 for(size_t i=0; i<num_x; i++){ 4569 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4570 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4571 for(size_t j=0; j<num_y; j++){ 4572 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4573 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4574 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 4575 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 4576 type = result_type + type_offset; 4577 4578 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 4579 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 4580 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 4581 4582 size_t current_blockcount_z; 4583 double * pb_pos = cur_pb_buf_pos; 4584 double * next_pb_pos = next_pb_buf_pos; 4585 size_t strip_unpredictable_count = 0; 4586 for(size_t k=0; k<num_z; k++){ 4587 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4588 4589 /*sampling and decide which predictor*/ 4590 { 4591 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 4592 double * cur_data_pos; 4593 double curData; 4594 double pred_reg, pred_sz; 4595 double err_sz = 0.0, err_reg = 0.0; 4596 int bmi = 0; 4597 if(i>0 && j>0 && k>0){ 4598 for(int i=0; i<block_size; i++){ 4599 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4600 curData = *cur_data_pos; 4601 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4602 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4603 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4604 err_reg += fabs(pred_reg - curData); 4605 4606 bmi = block_size - i; 4607 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4608 curData = *cur_data_pos; 4609 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4610 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4611 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4612 err_reg += fabs(pred_reg - curData); 4613 4614 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4615 curData = *cur_data_pos; 4616 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4617 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4618 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4619 err_reg += fabs(pred_reg - curData); 4620 4621 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4622 curData = *cur_data_pos; 4623 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4624 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4625 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4626 err_reg += fabs(pred_reg - curData); 4627 } 4628 } 4629 else{ 4630 for(int i=1; i<block_size; i++){ 4631 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4632 curData = *cur_data_pos; 4633 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4634 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4635 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4636 err_reg += fabs(pred_reg - curData); 4637 4638 bmi = block_size - i; 4639 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4640 curData = *cur_data_pos; 4641 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4642 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4643 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4644 err_reg += fabs(pred_reg - curData); 4645 4646 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4647 curData = *cur_data_pos; 4648 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4649 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4650 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4651 err_reg += fabs(pred_reg - curData); 4652 4653 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4654 curData = *cur_data_pos; 4655 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4656 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4657 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4658 err_reg += fabs(pred_reg - curData); 4659 4660 } 4661 } 4662 use_reg = (err_reg < err_sz); 4663 } 4664 if(use_reg){ 4665 { 4666 /*predict coefficients in current block via previous reg_block*/ 4667 double cur_coeff; 4668 double diff, itvNum; 4669 for(int e=0; e<4; e++){ 4670 cur_coeff = reg_params_pos[e*num_blocks]; 4671 diff = cur_coeff - last_coeffcients[e]; 4672 itvNum = fabs(diff)/precision[e] + 1; 4673 if (itvNum < coeff_intvCapacity_sz){ 4674 if (diff < 0) itvNum = -itvNum; 4675 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4676 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4677 //ganrantee comporession error against the case of machine-epsilon 4678 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4679 coeff_type[e][coeff_index] = 0; 4680 last_coeffcients[e] = cur_coeff; 4681 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4682 } 4683 } 4684 else{ 4685 coeff_type[e][coeff_index] = 0; 4686 last_coeffcients[e] = cur_coeff; 4687 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4688 } 4689 } 4690 coeff_index ++; 4691 } 4692 double curData; 4693 double pred; 4694 double itvNum; 4695 double diff; 4696 size_t index = 0; 4697 size_t block_unpredictable_count = 0; 4698 double * cur_data_pos = data_pos; 4699 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4700 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4701 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4702 curData = *cur_data_pos; 4703 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 4704 diff = curData - pred; 4705 itvNum = fabs(diff)/tmp_realPrecision + 1; 4706 if (itvNum < intvCapacity){ 4707 if (diff < 0) itvNum = -itvNum; 4708 type[index] = (int) (itvNum/2) + intvRadius; 4709 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4710 //ganrantee comporession error against the case of machine-epsilon 4711 if(fabs(curData - pred)>tmp_realPrecision){ 4712 type[index] = 0; 4713 pred = curData; 4714 unpredictable_data[block_unpredictable_count ++] = curData; 4715 } 4716 } 4717 else{ 4718 type[index] = 0; 4719 pred = curData; 4720 unpredictable_data[block_unpredictable_count ++] = curData; 4721 } 4722 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 4723 // assign value to block surfaces 4724 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 4725 } 4726 index ++; 4727 cur_data_pos ++; 4728 } 4729 cur_data_pos += dim1_offset - current_blockcount_z; 4730 } 4731 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4732 } 4733 /*dealing with the last ii (boundary)*/ 4734 { 4735 // ii == current_blockcount_x - 1 4736 size_t ii = current_blockcount_x - 1; 4737 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4738 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4739 curData = *cur_data_pos; 4740 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 4741 diff = curData - pred; 4742 itvNum = fabs(diff)/tmp_realPrecision + 1; 4743 if (itvNum < intvCapacity){ 4744 if (diff < 0) itvNum = -itvNum; 4745 type[index] = (int) (itvNum/2) + intvRadius; 4746 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4747 //ganrantee comporession error against the case of machine-epsilon 4748 if(fabs(curData - pred)>tmp_realPrecision){ 4749 type[index] = 0; 4750 pred = curData; 4751 unpredictable_data[block_unpredictable_count ++] = curData; 4752 } 4753 } 4754 else{ 4755 type[index] = 0; 4756 pred = curData; 4757 unpredictable_data[block_unpredictable_count ++] = curData; 4758 } 4759 4760 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 4761 // assign value to block surfaces 4762 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 4763 } 4764 // assign value to next prediction buffer 4765 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 4766 index ++; 4767 cur_data_pos ++; 4768 } 4769 cur_data_pos += dim1_offset - current_blockcount_z; 4770 } 4771 } 4772 unpredictable_count = block_unpredictable_count; 4773 strip_unpredictable_count += unpredictable_count; 4774 unpredictable_data += unpredictable_count; 4775 4776 reg_count ++; 4777 } 4778 else{ 4779 // use SZ 4780 // SZ predication 4781 unpredictable_count = 0; 4782 double * cur_pb_pos = pb_pos; 4783 double * cur_data_pos = data_pos; 4784 double curData; 4785 double pred3D; 4786 double itvNum, diff; 4787 size_t index = 0; 4788 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4789 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4790 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4791 4792 curData = *cur_data_pos; 4793 if(fabs(curData - mean) <= realPrecision){ 4794 // adjust type[index] to intvRadius for coherence with freq in reg 4795 type[index] = intvRadius; 4796 *cur_pb_pos = mean; 4797 } 4798 else 4799 { 4800 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 4801 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 4802 diff = curData - pred3D; 4803 itvNum = fabs(diff)/realPrecision + 1; 4804 if (itvNum < intvCapacity_sz){ 4805 if (diff < 0) itvNum = -itvNum; 4806 type[index] = (int) (itvNum/2) + intvRadius; 4807 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4808 if(type[index] <= intvRadius) type[index] -= 1; 4809 //ganrantee comporession error against the case of machine-epsilon 4810 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4811 type[index] = 0; 4812 *cur_pb_pos = curData; 4813 unpredictable_data[unpredictable_count ++] = curData; 4814 } 4815 } 4816 else{ 4817 type[index] = 0; 4818 *cur_pb_pos = curData; 4819 unpredictable_data[unpredictable_count ++] = curData; 4820 } 4821 } 4822 index ++; 4823 cur_pb_pos ++; 4824 cur_data_pos ++; 4825 } 4826 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 4827 cur_data_pos += dim1_offset - current_blockcount_z; 4828 } 4829 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 4830 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4831 } 4832 /*dealing with the last ii (boundary)*/ 4833 { 4834 // ii == current_blockcount_x - 1 4835 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4836 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4837 4838 curData = *cur_data_pos; 4839 if(fabs(curData - mean) <= realPrecision){ 4840 // adjust type[index] to intvRadius for coherence with freq in reg 4841 type[index] = intvRadius; 4842 *cur_pb_pos = mean; 4843 } 4844 else 4845 { 4846 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 4847 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 4848 diff = curData - pred3D; 4849 itvNum = fabs(diff)/realPrecision + 1; 4850 if (itvNum < intvCapacity_sz){ 4851 if (diff < 0) itvNum = -itvNum; 4852 type[index] = (int) (itvNum/2) + intvRadius; 4853 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4854 if(type[index] <= intvRadius) type[index] -= 1; 4855 //ganrantee comporession error against the case of machine-epsilon 4856 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4857 type[index] = 0; 4858 *cur_pb_pos = curData; 4859 unpredictable_data[unpredictable_count ++] = curData; 4860 } 4861 } 4862 else{ 4863 type[index] = 0; 4864 *cur_pb_pos = curData; 4865 unpredictable_data[unpredictable_count ++] = curData; 4866 } 4867 } 4868 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 4869 index ++; 4870 cur_pb_pos ++; 4871 cur_data_pos ++; 4872 } 4873 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 4874 cur_data_pos += dim1_offset - current_blockcount_z; 4875 } 4876 } 4877 strip_unpredictable_count += unpredictable_count; 4878 unpredictable_data += unpredictable_count; 4879 // change indicator 4880 indicator_pos[k] = 1; 4881 }// end SZ 4882 4883 reg_params_pos ++; 4884 data_pos += current_blockcount_z; 4885 pb_pos += current_blockcount_z; 4886 next_pb_pos += current_blockcount_z; 4887 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 4888 4889 } // end k 4890 4891 if(strip_unpredictable_count > max_unpred_count){ 4892 max_unpred_count = strip_unpredictable_count; 4893 } 4894 total_unpred += strip_unpredictable_count; 4895 indicator_pos += num_z; 4896 }// end j 4897 double * tmp; 4898 tmp = cur_pb_buf; 4899 cur_pb_buf = next_pb_buf; 4900 next_pb_buf = tmp; 4901 }// end i 4902 } 4903 else{ 4904 int intvCapacity_sz = intvCapacity - 2; 4905 for(size_t i=0; i<num_x; i++){ 4906 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4907 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4908 4909 for(size_t j=0; j<num_y; j++){ 4910 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4911 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4912 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 4913 // copy bottom plane from plane buffer 4914 // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double)); 4915 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 4916 type = result_type + type_offset; 4917 4918 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 4919 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 4920 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 4921 4922 size_t current_blockcount_z; 4923 double * pb_pos = cur_pb_buf_pos; 4924 double * next_pb_pos = next_pb_buf_pos; 4925 size_t strip_unpredictable_count = 0; 4926 for(size_t k=0; k<num_z; k++){ 4927 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4928 /*sampling*/ 4929 { 4930 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 4931 double * cur_data_pos; 4932 double curData; 4933 double pred_reg, pred_sz; 4934 double err_sz = 0.0, err_reg = 0.0; 4935 int bmi; 4936 if(i>0 && j>0 && k>0){ 4937 for(int i=0; i<block_size; i++){ 4938 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4939 curData = *cur_data_pos; 4940 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4941 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4942 err_sz += fabs(pred_sz - curData) + noise; 4943 err_reg += fabs(pred_reg - curData); 4944 4945 bmi = block_size - i; 4946 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4947 curData = *cur_data_pos; 4948 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4949 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4950 err_sz += fabs(pred_sz - curData) + noise; 4951 err_reg += fabs(pred_reg - curData); 4952 4953 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4954 curData = *cur_data_pos; 4955 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4956 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4957 err_sz += fabs(pred_sz - curData) + noise; 4958 err_reg += fabs(pred_reg - curData); 4959 4960 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4961 curData = *cur_data_pos; 4962 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4963 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4964 err_sz += fabs(pred_sz - curData) + noise; 4965 err_reg += fabs(pred_reg - curData); 4966 } 4967 } 4968 else{ 4969 for(int i=1; i<block_size; i++){ 4970 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4971 curData = *cur_data_pos; 4972 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4973 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4974 err_sz += fabs(pred_sz - curData) + noise; 4975 err_reg += fabs(pred_reg - curData); 4976 4977 bmi = block_size - i; 4978 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4979 curData = *cur_data_pos; 4980 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4981 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4982 err_sz += fabs(pred_sz - curData) + noise; 4983 err_reg += fabs(pred_reg - curData); 4984 4985 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4986 curData = *cur_data_pos; 4987 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4988 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4989 err_sz += fabs(pred_sz - curData) + noise; 4990 err_reg += fabs(pred_reg - curData); 4991 4992 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4993 curData = *cur_data_pos; 4994 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4995 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4996 err_sz += fabs(pred_sz - curData) + noise; 4997 err_reg += fabs(pred_reg - curData); 4998 } 4999 } 5000 use_reg = (err_reg < err_sz); 5001 5002 } 5003 if(use_reg) 5004 { 5005 { 5006 /*predict coefficients in current block via previous reg_block*/ 5007 double cur_coeff; 5008 double diff, itvNum; 5009 for(int e=0; e<4; e++){ 5010 cur_coeff = reg_params_pos[e*num_blocks]; 5011 diff = cur_coeff - last_coeffcients[e]; 5012 itvNum = fabs(diff)/precision[e] + 1; 5013 if (itvNum < coeff_intvCapacity_sz){ 5014 if (diff < 0) itvNum = -itvNum; 5015 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 5016 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 5017 //ganrantee comporession error against the case of machine-epsilon 5018 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 5019 coeff_type[e][coeff_index] = 0; 5020 last_coeffcients[e] = cur_coeff; 5021 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5022 } 5023 } 5024 else{ 5025 coeff_type[e][coeff_index] = 0; 5026 last_coeffcients[e] = cur_coeff; 5027 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5028 } 5029 } 5030 coeff_index ++; 5031 } 5032 double curData; 5033 double pred; 5034 double itvNum; 5035 double diff; 5036 size_t index = 0; 5037 size_t block_unpredictable_count = 0; 5038 double * cur_data_pos = data_pos; 5039 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5040 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5041 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5042 5043 curData = *cur_data_pos; 5044 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5045 diff = curData - pred; 5046 itvNum = fabs(diff)/tmp_realPrecision + 1; 5047 if (itvNum < intvCapacity){ 5048 if (diff < 0) itvNum = -itvNum; 5049 type[index] = (int) (itvNum/2) + intvRadius; 5050 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5051 //ganrantee comporession error against the case of machine-epsilon 5052 if(fabs(curData - pred)>tmp_realPrecision){ 5053 type[index] = 0; 5054 pred = curData; 5055 unpredictable_data[block_unpredictable_count ++] = curData; 5056 } 5057 } 5058 else{ 5059 type[index] = 0; 5060 pred = curData; 5061 unpredictable_data[block_unpredictable_count ++] = curData; 5062 } 5063 5064 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5065 // assign value to block surfaces 5066 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5067 } 5068 index ++; 5069 cur_data_pos ++; 5070 } 5071 cur_data_pos += dim1_offset - current_blockcount_z; 5072 } 5073 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5074 } 5075 /*dealing with the last ii (boundary)*/ 5076 { 5077 // ii == current_blockcount_x - 1 5078 size_t ii = current_blockcount_x - 1; 5079 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5080 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5081 curData = *cur_data_pos; 5082 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5083 diff = curData - pred; 5084 itvNum = fabs(diff)/tmp_realPrecision + 1; 5085 if (itvNum < intvCapacity){ 5086 if (diff < 0) itvNum = -itvNum; 5087 type[index] = (int) (itvNum/2) + intvRadius; 5088 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5089 //ganrantee comporession error against the case of machine-epsilon 5090 if(fabs(curData - pred)>tmp_realPrecision){ 5091 type[index] = 0; 5092 pred = curData; 5093 unpredictable_data[block_unpredictable_count ++] = curData; 5094 } 5095 } 5096 else{ 5097 type[index] = 0; 5098 pred = curData; 5099 unpredictable_data[block_unpredictable_count ++] = curData; 5100 } 5101 5102 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5103 // assign value to block surfaces 5104 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5105 } 5106 // assign value to next prediction buffer 5107 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 5108 index ++; 5109 cur_data_pos ++; 5110 } 5111 cur_data_pos += dim1_offset - current_blockcount_z; 5112 } 5113 } 5114 unpredictable_count = block_unpredictable_count; 5115 strip_unpredictable_count += unpredictable_count; 5116 unpredictable_data += unpredictable_count; 5117 reg_count ++; 5118 } 5119 else{ 5120 // use SZ 5121 // SZ predication 5122 unpredictable_count = 0; 5123 double * cur_pb_pos = pb_pos; 5124 double * cur_data_pos = data_pos; 5125 double curData; 5126 double pred3D; 5127 double itvNum, diff; 5128 size_t index = 0; 5129 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5130 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5131 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5132 5133 curData = *cur_data_pos; 5134 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5135 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5136 diff = curData - pred3D; 5137 itvNum = fabs(diff)/realPrecision + 1; 5138 if (itvNum < intvCapacity_sz){ 5139 if (diff < 0) itvNum = -itvNum; 5140 type[index] = (int) (itvNum/2) + intvRadius; 5141 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5142 //ganrantee comporession error against the case of machine-epsilon 5143 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5144 type[index] = 0; 5145 *cur_pb_pos = curData; 5146 unpredictable_data[unpredictable_count ++] = curData; 5147 } 5148 } 5149 else{ 5150 type[index] = 0; 5151 *cur_pb_pos = curData; 5152 unpredictable_data[unpredictable_count ++] = curData; 5153 } 5154 index ++; 5155 cur_pb_pos ++; 5156 cur_data_pos ++; 5157 } 5158 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5159 cur_data_pos += dim1_offset - current_blockcount_z; 5160 } 5161 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 5162 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5163 } 5164 /*dealing with the last ii (boundary)*/ 5165 { 5166 // ii == current_blockcount_x - 1 5167 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5168 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5169 5170 curData = *cur_data_pos; 5171 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5172 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5173 diff = curData - pred3D; 5174 itvNum = fabs(diff)/realPrecision + 1; 5175 if (itvNum < intvCapacity_sz){ 5176 if (diff < 0) itvNum = -itvNum; 5177 type[index] = (int) (itvNum/2) + intvRadius; 5178 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5179 //ganrantee comporession error against the case of machine-epsilon 5180 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5181 type[index] = 0; 5182 *cur_pb_pos = curData; 5183 unpredictable_data[unpredictable_count ++] = curData; 5184 } 5185 } 5186 else{ 5187 type[index] = 0; 5188 *cur_pb_pos = curData; 5189 unpredictable_data[unpredictable_count ++] = curData; 5190 } 5191 // assign value to next prediction buffer 5192 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 5193 index ++; 5194 cur_pb_pos ++; 5195 cur_data_pos ++; 5196 } 5197 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5198 cur_data_pos += dim1_offset - current_blockcount_z; 5199 } 5200 } 5201 strip_unpredictable_count += unpredictable_count; 5202 unpredictable_data += unpredictable_count; 5203 // change indicator 5204 indicator_pos[k] = 1; 5205 }// end SZ 5206 5207 reg_params_pos ++; 5208 data_pos += current_blockcount_z; 5209 pb_pos += current_blockcount_z; 5210 next_pb_pos += current_blockcount_z; 5211 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 5212 5213 } 5214 5215 if(strip_unpredictable_count > max_unpred_count){ 5216 max_unpred_count = strip_unpredictable_count; 5217 } 5218 total_unpred += strip_unpredictable_count; 5219 indicator_pos += num_z; 5220 } 5221 double * tmp; 5222 tmp = cur_pb_buf; 5223 cur_pb_buf = next_pb_buf; 5224 next_pb_buf = tmp; 5225 } 5226 } 5227 5228 free(prediction_buffer_1); 5229 free(prediction_buffer_2); 5230 5231 int stateNum = 2*quantization_intervals; 5232 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 5233 5234 size_t nodeCount = 0; 5235 init(huffmanTree, result_type, num_elements); 5236 size_t i = 0; 5237 for (i = 0; i < huffmanTree->stateNum; i++) 5238 if (huffmanTree->code[i]) nodeCount++; 5239 nodeCount = nodeCount*2-1; 5240 5241 unsigned char *treeBytes; 5242 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 5243 5244 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 5245 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 5246 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); 5247 unsigned char * result_pos = result; 5248 initRandomAccessBytes(result_pos); 5249 5250 result_pos += meta_data_offset; 5251 5252 sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 5253 result_pos += exe_params->SZ_SIZE_TYPE; 5254 5255 intToBytes_bigEndian(result_pos, block_size); 5256 result_pos += sizeof(int); 5257 doubleToBytes(result_pos, realPrecision); 5258 result_pos += sizeof(double); 5259 intToBytes_bigEndian(result_pos, quantization_intervals); 5260 result_pos += sizeof(int); 5261 intToBytes_bigEndian(result_pos, treeByteSize); 5262 result_pos += sizeof(int); 5263 intToBytes_bigEndian(result_pos, nodeCount); 5264 result_pos += sizeof(int); 5265 memcpy(result_pos, treeBytes, treeByteSize); 5266 result_pos += treeByteSize; 5267 free(treeBytes); 5268 5269 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 5270 result_pos += sizeof(unsigned char); 5271 memcpy(result_pos, &mean, sizeof(double)); 5272 result_pos += sizeof(double); 5273 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 5274 result_pos += indicator_size; 5275 5276 //convert the lead/mid/resi to byte stream 5277 if(reg_count > 0){ 5278 for(int e=0; e<4; e++){ 5279 int stateNum = 2*coeff_intvCapacity_sz; 5280 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 5281 size_t nodeCount = 0; 5282 init(huffmanTree, coeff_type[e], reg_count); 5283 size_t i = 0; 5284 for (i = 0; i < huffmanTree->stateNum; i++) 5285 if (huffmanTree->code[i]) nodeCount++; 5286 nodeCount = nodeCount*2-1; 5287 unsigned char *treeBytes; 5288 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 5289 doubleToBytes(result_pos, precision[e]); 5290 result_pos += sizeof(double); 5291 intToBytes_bigEndian(result_pos, coeff_intvRadius); 5292 result_pos += sizeof(int); 5293 intToBytes_bigEndian(result_pos, treeByteSize); 5294 result_pos += sizeof(int); 5295 intToBytes_bigEndian(result_pos, nodeCount); 5296 result_pos += sizeof(int); 5297 memcpy(result_pos, treeBytes, treeByteSize); 5298 result_pos += treeByteSize; 5299 free(treeBytes); 5300 size_t typeArray_size = 0; 5301 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 5302 sizeToBytes(result_pos, typeArray_size); 5303 result_pos += sizeof(size_t) + typeArray_size; 5304 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 5305 result_pos += sizeof(int); 5306 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); 5307 result_pos += coeff_unpredictable_count[e]*sizeof(double); 5308 SZ_ReleaseHuffman(huffmanTree); 5309 } 5310 } 5311 free(coeff_result_type); 5312 free(coeff_unpredictable_data); 5313 5314 //record the number of unpredictable data and also store them 5315 memcpy(result_pos, &total_unpred, sizeof(size_t)); 5316 result_pos += sizeof(size_t); 5317 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); 5318 result_pos += total_unpred * sizeof(double); 5319 size_t typeArray_size = 0; 5320 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 5321 result_pos += typeArray_size; 5322 size_t totalEncodeSize = result_pos - result; 5323 free(indicator); 5324 free(result_unpredictable_data); 5325 free(result_type); 5326 free(reg_params); 5327 5328 5329 SZ_ReleaseHuffman(huffmanTree); 5330 *comp_size = totalEncodeSize; 5331 return result; 5332 }
Note: See TracChangeset
for help on using the changeset viewer.