1 | /********************************************************************* |
---|
2 | Blosc - Blocked Shuffling and Compression Library |
---|
3 | |
---|
4 | Author: Francesc Alted <[email protected]> |
---|
5 | |
---|
6 | See LICENSES/BLOSC.txt for details about copyright and rights to use. |
---|
7 | **********************************************************************/ |
---|
8 | |
---|
9 | #include "bitshuffle-generic.h" |
---|
10 | |
---|
11 | |
---|
12 | /* Transpose bytes within elements, starting partway through input. */ |
---|
13 | int64_t bshuf_trans_byte_elem_remainder(void* in, void* out, const size_t size, |
---|
14 | const size_t elem_size, const size_t start) { |
---|
15 | |
---|
16 | char* in_b = (char*) in; |
---|
17 | char* out_b = (char*) out; |
---|
18 | size_t ii, jj, kk; |
---|
19 | |
---|
20 | CHECK_MULT_EIGHT(start); |
---|
21 | |
---|
22 | if (size > start) { |
---|
23 | /* ii loop separated into 2 loops so the compiler can unroll */ |
---|
24 | /* the inner one. */ |
---|
25 | for (ii = start; ii + 7 < size; ii += 8) { |
---|
26 | for (jj = 0; jj < elem_size; jj++) { |
---|
27 | for (kk = 0; kk < 8; kk++) { |
---|
28 | out_b[jj * size + ii + kk] |
---|
29 | = in_b[ii * elem_size + kk * elem_size + jj]; |
---|
30 | } |
---|
31 | } |
---|
32 | } |
---|
33 | for (ii = size - size % 8; ii < size; ii ++) { |
---|
34 | for (jj = 0; jj < elem_size; jj++) { |
---|
35 | out_b[jj * size + ii] = in_b[ii * elem_size + jj]; |
---|
36 | } |
---|
37 | } |
---|
38 | } |
---|
39 | return size * elem_size; |
---|
40 | } |
---|
41 | |
---|
42 | |
---|
43 | /* Transpose bytes within elements. */ |
---|
44 | int64_t bshuf_trans_byte_elem_scal(void* in, void* out, const size_t size, |
---|
45 | const size_t elem_size) { |
---|
46 | |
---|
47 | return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); |
---|
48 | } |
---|
49 | |
---|
50 | |
---|
51 | /* Transpose bits within bytes. */ |
---|
52 | int64_t bshuf_trans_bit_byte_remainder(void* in, void* out, const size_t size, |
---|
53 | const size_t elem_size, const size_t start_byte) { |
---|
54 | |
---|
55 | int64_t* in_b = in; |
---|
56 | int8_t* out_b = out; |
---|
57 | |
---|
58 | int64_t x, t; |
---|
59 | |
---|
60 | size_t nbyte = elem_size * size; |
---|
61 | size_t nbyte_bitrow = nbyte / 8; |
---|
62 | size_t ii; |
---|
63 | int kk; |
---|
64 | |
---|
65 | CHECK_MULT_EIGHT(nbyte); |
---|
66 | CHECK_MULT_EIGHT(start_byte); |
---|
67 | |
---|
68 | for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { |
---|
69 | x = in_b[ii]; |
---|
70 | TRANS_BIT_8X8(x, t); |
---|
71 | for (kk = 0; kk < 8; kk ++) { |
---|
72 | out_b[kk * nbyte_bitrow + ii] = x; |
---|
73 | x = x >> 8; |
---|
74 | } |
---|
75 | } |
---|
76 | return size * elem_size; |
---|
77 | } |
---|
78 | |
---|
79 | |
---|
80 | /* Transpose bits within bytes. */ |
---|
81 | int64_t bshuf_trans_bit_byte_scal(void* in, void* out, const size_t size, |
---|
82 | const size_t elem_size) { |
---|
83 | |
---|
84 | return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); |
---|
85 | } |
---|
86 | |
---|
87 | |
---|
88 | /* General transpose of an array, optimized for large element sizes. */ |
---|
89 | int64_t bshuf_trans_elem(void* in, void* out, const size_t lda, |
---|
90 | const size_t ldb, const size_t elem_size) { |
---|
91 | |
---|
92 | char* in_b = (char*) in; |
---|
93 | char* out_b = (char*) out; |
---|
94 | size_t ii, jj; |
---|
95 | for (ii = 0; ii < lda; ii++) { |
---|
96 | for (jj = 0; jj < ldb; jj++) { |
---|
97 | memcpy(&out_b[(jj*lda + ii) * elem_size], |
---|
98 | &in_b[(ii*ldb + jj) * elem_size], elem_size); |
---|
99 | } |
---|
100 | } |
---|
101 | return lda * ldb * elem_size; |
---|
102 | } |
---|
103 | |
---|
104 | |
---|
105 | /* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ |
---|
106 | int64_t bshuf_trans_bitrow_eight(void* in, void* out, const size_t size, |
---|
107 | const size_t elem_size) { |
---|
108 | |
---|
109 | size_t nbyte_bitrow = size / 8; |
---|
110 | |
---|
111 | CHECK_MULT_EIGHT(size); |
---|
112 | |
---|
113 | return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); |
---|
114 | } |
---|
115 | |
---|
116 | |
---|
117 | /* Transpose bits within elements. */ |
---|
118 | int64_t bshuf_trans_bit_elem_scal(void* in, void* out, const size_t size, |
---|
119 | const size_t elem_size, void* tmp_buf) { |
---|
120 | |
---|
121 | int64_t count; |
---|
122 | |
---|
123 | CHECK_MULT_EIGHT(size); |
---|
124 | |
---|
125 | count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); |
---|
126 | CHECK_ERR(count); |
---|
127 | count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); |
---|
128 | CHECK_ERR(count); |
---|
129 | count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); |
---|
130 | |
---|
131 | return count; |
---|
132 | } |
---|
133 | |
---|
134 | |
---|
135 | /* For data organized into a row for each bit (8 * elem_size rows), transpose |
---|
136 | * the bytes. */ |
---|
137 | int64_t bshuf_trans_byte_bitrow_scal(void* in, void* out, const size_t size, |
---|
138 | const size_t elem_size) { |
---|
139 | char* in_b = (char*) in; |
---|
140 | char* out_b = (char*) out; |
---|
141 | |
---|
142 | size_t nbyte_row = size / 8; |
---|
143 | size_t ii, jj, kk; |
---|
144 | |
---|
145 | CHECK_MULT_EIGHT(size); |
---|
146 | |
---|
147 | for (jj = 0; jj < elem_size; jj++) { |
---|
148 | for (ii = 0; ii < nbyte_row; ii++) { |
---|
149 | for (kk = 0; kk < 8; kk++) { |
---|
150 | out_b[ii * 8 * elem_size + jj * 8 + kk] = \ |
---|
151 | in_b[(jj * 8 + kk) * nbyte_row + ii]; |
---|
152 | } |
---|
153 | } |
---|
154 | } |
---|
155 | return size * elem_size; |
---|
156 | } |
---|
157 | |
---|
158 | |
---|
159 | /* Shuffle bits within the bytes of eight element blocks. */ |
---|
160 | int64_t bshuf_shuffle_bit_eightelem_scal(void* in, void* out, |
---|
161 | const size_t size, const size_t elem_size) { |
---|
162 | char* in_b = (char*) in; |
---|
163 | char* out_b = (char*) out; |
---|
164 | size_t nbyte = elem_size * size; |
---|
165 | int64_t x, t; |
---|
166 | size_t jj, ii, kk; |
---|
167 | |
---|
168 | CHECK_MULT_EIGHT(size); |
---|
169 | |
---|
170 | for (jj = 0; jj < 8 * elem_size; jj += 8) { |
---|
171 | for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { |
---|
172 | x = *((int64_t*) &in_b[ii + jj]); |
---|
173 | TRANS_BIT_8X8(x, t); |
---|
174 | for (kk = 0; kk < 8; kk++) { |
---|
175 | *((uint8_t*) &out_b[ii + jj / 8 + kk * elem_size]) = x; |
---|
176 | x = x >> 8; |
---|
177 | } |
---|
178 | } |
---|
179 | } |
---|
180 | return size * elem_size; |
---|
181 | } |
---|
182 | |
---|
183 | |
---|
184 | /* Untranspose bits within elements. */ |
---|
185 | int64_t bshuf_untrans_bit_elem_scal(void* in, void* out, const size_t size, |
---|
186 | const size_t elem_size, void* tmp_buf) { |
---|
187 | |
---|
188 | int64_t count; |
---|
189 | |
---|
190 | CHECK_MULT_EIGHT(size); |
---|
191 | |
---|
192 | count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); |
---|
193 | CHECK_ERR(count); |
---|
194 | count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); |
---|
195 | |
---|
196 | return count; |
---|
197 | } |
---|