1 | // Copyright 2005 Google Inc. All Rights Reserved. |
---|
2 | // |
---|
3 | // Redistribution and use in source and binary forms, with or without |
---|
4 | // modification, are permitted provided that the following conditions are |
---|
5 | // met: |
---|
6 | // |
---|
7 | // * Redistributions of source code must retain the above copyright |
---|
8 | // notice, this list of conditions and the following disclaimer. |
---|
9 | // * Redistributions in binary form must reproduce the above |
---|
10 | // copyright notice, this list of conditions and the following disclaimer |
---|
11 | // in the documentation and/or other materials provided with the |
---|
12 | // distribution. |
---|
13 | // * Neither the name of Google Inc. nor the names of its |
---|
14 | // contributors may be used to endorse or promote products derived from |
---|
15 | // this software without specific prior written permission. |
---|
16 | // |
---|
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
28 | |
---|
29 | #include "snappy.h" |
---|
30 | #include "snappy-internal.h" |
---|
31 | #include "snappy-sinksource.h" |
---|
32 | |
---|
33 | #include <stdio.h> |
---|
34 | |
---|
35 | #include <algorithm> |
---|
36 | #include <string> |
---|
37 | #include <vector> |
---|
38 | |
---|
39 | |
---|
40 | namespace snappy { |
---|
41 | |
---|
42 | // Any hash function will produce a valid compressed bitstream, but a good |
---|
43 | // hash function reduces the number of collisions and thus yields better |
---|
44 | // compression for compressible input, and more speed for incompressible |
---|
45 | // input. Of course, it doesn't hurt if the hash function is reasonably fast |
---|
46 | // either, as it gets called a lot. |
---|
47 | static inline uint32 HashBytes(uint32 bytes, int shift) { |
---|
48 | uint32 kMul = 0x1e35a7bd; |
---|
49 | return (bytes * kMul) >> shift; |
---|
50 | } |
---|
51 | static inline uint32 Hash(const char* p, int shift) { |
---|
52 | return HashBytes(UNALIGNED_LOAD32(p), shift); |
---|
53 | } |
---|
54 | |
---|
55 | size_t MaxCompressedLength(size_t source_len) { |
---|
56 | // Compressed data can be defined as: |
---|
57 | // compressed := item* literal* |
---|
58 | // item := literal* copy |
---|
59 | // |
---|
60 | // The trailing literal sequence has a space blowup of at most 62/60 |
---|
61 | // since a literal of length 60 needs one tag byte + one extra byte |
---|
62 | // for length information. |
---|
63 | // |
---|
64 | // Item blowup is trickier to measure. Suppose the "copy" op copies |
---|
65 | // 4 bytes of data. Because of a special check in the encoding code, |
---|
66 | // we produce a 4-byte copy only if the offset is < 65536. Therefore |
---|
67 | // the copy op takes 3 bytes to encode, and this type of item leads |
---|
68 | // to at most the 62/60 blowup for representing literals. |
---|
69 | // |
---|
70 | // Suppose the "copy" op copies 5 bytes of data. If the offset is big |
---|
71 | // enough, it will take 5 bytes to encode the copy op. Therefore the |
---|
72 | // worst case here is a one-byte literal followed by a five-byte copy. |
---|
73 | // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. |
---|
74 | // |
---|
75 | // This last factor dominates the blowup, so the final estimate is: |
---|
76 | return 32 + source_len + source_len/6; |
---|
77 | } |
---|
78 | |
---|
79 | enum { |
---|
80 | LITERAL = 0, |
---|
81 | COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode |
---|
82 | COPY_2_BYTE_OFFSET = 2, |
---|
83 | COPY_4_BYTE_OFFSET = 3 |
---|
84 | }; |
---|
85 | static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. |
---|
86 | |
---|
87 | // Copy "len" bytes from "src" to "op", one byte at a time. Used for |
---|
88 | // handling COPY operations where the input and output regions may |
---|
89 | // overlap. For example, suppose: |
---|
90 | // src == "ab" |
---|
91 | // op == src + 2 |
---|
92 | // len == 20 |
---|
93 | // After IncrementalCopy(src, op, len), the result will have |
---|
94 | // eleven copies of "ab" |
---|
95 | // ababababababababababab |
---|
96 | // Note that this does not match the semantics of either memcpy() |
---|
97 | // or memmove(). |
---|
98 | static inline void IncrementalCopy(const char* src, char* op, ssize_t len) { |
---|
99 | assert(len > 0); |
---|
100 | do { |
---|
101 | *op++ = *src++; |
---|
102 | } while (--len > 0); |
---|
103 | } |
---|
104 | |
---|
105 | // Equivalent to IncrementalCopy except that it can write up to ten extra |
---|
106 | // bytes after the end of the copy, and that it is faster. |
---|
107 | // |
---|
108 | // The main part of this loop is a simple copy of eight bytes at a time until |
---|
109 | // we've copied (at least) the requested amount of bytes. However, if op and |
---|
110 | // src are less than eight bytes apart (indicating a repeating pattern of |
---|
111 | // length < 8), we first need to expand the pattern in order to get the correct |
---|
112 | // results. For instance, if the buffer looks like this, with the eight-byte |
---|
113 | // <src> and <op> patterns marked as intervals: |
---|
114 | // |
---|
115 | // abxxxxxxxxxxxx |
---|
116 | // [------] src |
---|
117 | // [------] op |
---|
118 | // |
---|
119 | // a single eight-byte copy from <src> to <op> will repeat the pattern once, |
---|
120 | // after which we can move <op> two bytes without moving <src>: |
---|
121 | // |
---|
122 | // ababxxxxxxxxxx |
---|
123 | // [------] src |
---|
124 | // [------] op |
---|
125 | // |
---|
126 | // and repeat the exercise until the two no longer overlap. |
---|
127 | // |
---|
128 | // This allows us to do very well in the special case of one single byte |
---|
129 | // repeated many times, without taking a big hit for more general cases. |
---|
130 | // |
---|
131 | // The worst case of extra writing past the end of the match occurs when |
---|
132 | // op - src == 1 and len == 1; the last copy will read from byte positions |
---|
133 | // [0..7] and write to [4..11], whereas it was only supposed to write to |
---|
134 | // position 1. Thus, ten excess bytes. |
---|
135 | |
---|
136 | namespace { |
---|
137 | |
---|
138 | const int kMaxIncrementCopyOverflow = 10; |
---|
139 | |
---|
140 | inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) { |
---|
141 | while (op - src < 8) { |
---|
142 | UnalignedCopy64(src, op); |
---|
143 | len -= op - src; |
---|
144 | op += op - src; |
---|
145 | } |
---|
146 | while (len > 0) { |
---|
147 | UnalignedCopy64(src, op); |
---|
148 | src += 8; |
---|
149 | op += 8; |
---|
150 | len -= 8; |
---|
151 | } |
---|
152 | } |
---|
153 | |
---|
154 | } // namespace |
---|
155 | |
---|
156 | static inline char* EmitLiteral(char* op, |
---|
157 | const char* literal, |
---|
158 | int len, |
---|
159 | bool allow_fast_path) { |
---|
160 | int n = len - 1; // Zero-length literals are disallowed |
---|
161 | if (n < 60) { |
---|
162 | // Fits in tag byte |
---|
163 | *op++ = LITERAL | (n << 2); |
---|
164 | |
---|
165 | // The vast majority of copies are below 16 bytes, for which a |
---|
166 | // call to memcpy is overkill. This fast path can sometimes |
---|
167 | // copy up to 15 bytes too much, but that is okay in the |
---|
168 | // main loop, since we have a bit to go on for both sides: |
---|
169 | // |
---|
170 | // - The input will always have kInputMarginBytes = 15 extra |
---|
171 | // available bytes, as long as we're in the main loop, and |
---|
172 | // if not, allow_fast_path = false. |
---|
173 | // - The output will always have 32 spare bytes (see |
---|
174 | // MaxCompressedLength). |
---|
175 | if (allow_fast_path && len <= 16) { |
---|
176 | UnalignedCopy64(literal, op); |
---|
177 | UnalignedCopy64(literal + 8, op + 8); |
---|
178 | return op + len; |
---|
179 | } |
---|
180 | } else { |
---|
181 | // Encode in upcoming bytes |
---|
182 | char* base = op; |
---|
183 | int count = 0; |
---|
184 | op++; |
---|
185 | while (n > 0) { |
---|
186 | *op++ = n & 0xff; |
---|
187 | n >>= 8; |
---|
188 | count++; |
---|
189 | } |
---|
190 | assert(count >= 1); |
---|
191 | assert(count <= 4); |
---|
192 | *base = LITERAL | ((59+count) << 2); |
---|
193 | } |
---|
194 | memcpy(op, literal, len); |
---|
195 | return op + len; |
---|
196 | } |
---|
197 | |
---|
198 | static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) { |
---|
199 | assert(len <= 64); |
---|
200 | assert(len >= 4); |
---|
201 | assert(offset < 65536); |
---|
202 | |
---|
203 | if ((len < 12) && (offset < 2048)) { |
---|
204 | size_t len_minus_4 = len - 4; |
---|
205 | assert(len_minus_4 < 8); // Must fit in 3 bits |
---|
206 | *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5); |
---|
207 | *op++ = offset & 0xff; |
---|
208 | } else { |
---|
209 | *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2); |
---|
210 | LittleEndian::Store16(op, offset); |
---|
211 | op += 2; |
---|
212 | } |
---|
213 | return op; |
---|
214 | } |
---|
215 | |
---|
216 | static inline char* EmitCopy(char* op, size_t offset, int len) { |
---|
217 | // Emit 64 byte copies but make sure to keep at least four bytes reserved |
---|
218 | while (len >= 68) { |
---|
219 | op = EmitCopyLessThan64(op, offset, 64); |
---|
220 | len -= 64; |
---|
221 | } |
---|
222 | |
---|
223 | // Emit an extra 60 byte copy if have too much data to fit in one copy |
---|
224 | if (len > 64) { |
---|
225 | op = EmitCopyLessThan64(op, offset, 60); |
---|
226 | len -= 60; |
---|
227 | } |
---|
228 | |
---|
229 | // Emit remainder |
---|
230 | op = EmitCopyLessThan64(op, offset, len); |
---|
231 | return op; |
---|
232 | } |
---|
233 | |
---|
234 | |
---|
235 | bool GetUncompressedLength(const char* start, size_t n, size_t* result) { |
---|
236 | uint32 v = 0; |
---|
237 | const char* limit = start + n; |
---|
238 | if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { |
---|
239 | *result = v; |
---|
240 | return true; |
---|
241 | } else { |
---|
242 | return false; |
---|
243 | } |
---|
244 | } |
---|
245 | |
---|
246 | namespace internal { |
---|
247 | uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { |
---|
248 | // Use smaller hash table when input.size() is smaller, since we |
---|
249 | // fill the table, incurring O(hash table size) overhead for |
---|
250 | // compression, and if the input is short, we won't need that |
---|
251 | // many hash table entries anyway. |
---|
252 | assert(kMaxHashTableSize >= 256); |
---|
253 | size_t htsize = 256; |
---|
254 | while (htsize < kMaxHashTableSize && htsize < input_size) { |
---|
255 | htsize <<= 1; |
---|
256 | } |
---|
257 | |
---|
258 | uint16* table; |
---|
259 | if (htsize <= ARRAYSIZE(small_table_)) { |
---|
260 | table = small_table_; |
---|
261 | } else { |
---|
262 | if (large_table_ == NULL) { |
---|
263 | large_table_ = new uint16[kMaxHashTableSize]; |
---|
264 | } |
---|
265 | table = large_table_; |
---|
266 | } |
---|
267 | |
---|
268 | *table_size = htsize; |
---|
269 | memset(table, 0, htsize * sizeof(*table)); |
---|
270 | return table; |
---|
271 | } |
---|
272 | } // end namespace internal |
---|
273 | |
---|
274 | // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will |
---|
275 | // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have |
---|
276 | // empirically found that overlapping loads such as |
---|
277 | // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) |
---|
278 | // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. |
---|
279 | // |
---|
280 | // We have different versions for 64- and 32-bit; ideally we would avoid the |
---|
281 | // two functions and just inline the UNALIGNED_LOAD64 call into |
---|
282 | // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever |
---|
283 | // enough to avoid loading the value multiple times then. For 64-bit, the load |
---|
284 | // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is |
---|
285 | // done at GetUint32AtOffset() time. |
---|
286 | |
---|
287 | #ifdef ARCH_K8 |
---|
288 | |
---|
289 | typedef uint64 EightBytesReference; |
---|
290 | |
---|
291 | static inline EightBytesReference GetEightBytesAt(const char* ptr) { |
---|
292 | return UNALIGNED_LOAD64(ptr); |
---|
293 | } |
---|
294 | |
---|
295 | static inline uint32 GetUint32AtOffset(uint64 v, int offset) { |
---|
296 | assert(offset >= 0); |
---|
297 | assert(offset <= 4); |
---|
298 | return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); |
---|
299 | } |
---|
300 | |
---|
301 | #else |
---|
302 | |
---|
303 | typedef const char* EightBytesReference; |
---|
304 | |
---|
305 | static inline EightBytesReference GetEightBytesAt(const char* ptr) { |
---|
306 | return ptr; |
---|
307 | } |
---|
308 | |
---|
309 | static inline uint32 GetUint32AtOffset(const char* v, int offset) { |
---|
310 | assert(offset >= 0); |
---|
311 | assert(offset <= 4); |
---|
312 | return UNALIGNED_LOAD32(v + offset); |
---|
313 | } |
---|
314 | |
---|
315 | #endif |
---|
316 | |
---|
317 | // Flat array compression that does not emit the "uncompressed length" |
---|
318 | // prefix. Compresses "input" string to the "*op" buffer. |
---|
319 | // |
---|
320 | // REQUIRES: "input" is at most "kBlockSize" bytes long. |
---|
321 | // REQUIRES: "op" points to an array of memory that is at least |
---|
322 | // "MaxCompressedLength(input.size())" in size. |
---|
323 | // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. |
---|
324 | // REQUIRES: "table_size" is a power of two |
---|
325 | // |
---|
326 | // Returns an "end" pointer into "op" buffer. |
---|
327 | // "end - op" is the compressed size of "input". |
---|
328 | namespace internal { |
---|
329 | char* CompressFragment(const char* input, |
---|
330 | size_t input_size, |
---|
331 | char* op, |
---|
332 | uint16* table, |
---|
333 | const int table_size) { |
---|
334 | // "ip" is the input pointer, and "op" is the output pointer. |
---|
335 | const char* ip = input; |
---|
336 | assert(input_size <= kBlockSize); |
---|
337 | assert((table_size & (table_size - 1)) == 0); // table must be power of two |
---|
338 | const int shift = 32 - Bits::Log2Floor(table_size); |
---|
339 | assert(static_cast<int>(kuint32max >> shift) == table_size - 1); |
---|
340 | const char* ip_end = input + input_size; |
---|
341 | const char* base_ip = ip; |
---|
342 | // Bytes in [next_emit, ip) will be emitted as literal bytes. Or |
---|
343 | // [next_emit, ip_end) after the main loop. |
---|
344 | const char* next_emit = ip; |
---|
345 | |
---|
346 | const size_t kInputMarginBytes = 15; |
---|
347 | if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { |
---|
348 | const char* ip_limit = input + input_size - kInputMarginBytes; |
---|
349 | |
---|
350 | for (uint32 next_hash = Hash(++ip, shift); ; ) { |
---|
351 | assert(next_emit < ip); |
---|
352 | // The body of this loop calls EmitLiteral once and then EmitCopy one or |
---|
353 | // more times. (The exception is that when we're close to exhausting |
---|
354 | // the input we goto emit_remainder.) |
---|
355 | // |
---|
356 | // In the first iteration of this loop we're just starting, so |
---|
357 | // there's nothing to copy, so calling EmitLiteral once is |
---|
358 | // necessary. And we only start a new iteration when the |
---|
359 | // current iteration has determined that a call to EmitLiteral will |
---|
360 | // precede the next call to EmitCopy (if any). |
---|
361 | // |
---|
362 | // Step 1: Scan forward in the input looking for a 4-byte-long match. |
---|
363 | // If we get close to exhausting the input then goto emit_remainder. |
---|
364 | // |
---|
365 | // Heuristic match skipping: If 32 bytes are scanned with no matches |
---|
366 | // found, start looking only at every other byte. If 32 more bytes are |
---|
367 | // scanned, look at every third byte, etc.. When a match is found, |
---|
368 | // immediately go back to looking at every byte. This is a small loss |
---|
369 | // (~5% performance, ~0.1% density) for compressible data due to more |
---|
370 | // bookkeeping, but for non-compressible data (such as JPEG) it's a huge |
---|
371 | // win since the compressor quickly "realizes" the data is incompressible |
---|
372 | // and doesn't bother looking for matches everywhere. |
---|
373 | // |
---|
374 | // The "skip" variable keeps track of how many bytes there are since the |
---|
375 | // last match; dividing it by 32 (ie. right-shifting by five) gives the |
---|
376 | // number of bytes to move ahead for each iteration. |
---|
377 | uint32 skip = 32; |
---|
378 | |
---|
379 | const char* next_ip = ip; |
---|
380 | const char* candidate; |
---|
381 | do { |
---|
382 | ip = next_ip; |
---|
383 | uint32 hash = next_hash; |
---|
384 | assert(hash == Hash(ip, shift)); |
---|
385 | uint32 bytes_between_hash_lookups = skip++ >> 5; |
---|
386 | next_ip = ip + bytes_between_hash_lookups; |
---|
387 | if (PREDICT_FALSE(next_ip > ip_limit)) { |
---|
388 | goto emit_remainder; |
---|
389 | } |
---|
390 | next_hash = Hash(next_ip, shift); |
---|
391 | candidate = base_ip + table[hash]; |
---|
392 | assert(candidate >= base_ip); |
---|
393 | assert(candidate < ip); |
---|
394 | |
---|
395 | table[hash] = ip - base_ip; |
---|
396 | } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) != |
---|
397 | UNALIGNED_LOAD32(candidate))); |
---|
398 | |
---|
399 | // Step 2: A 4-byte match has been found. We'll later see if more |
---|
400 | // than 4 bytes match. But, prior to the match, input |
---|
401 | // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." |
---|
402 | assert(next_emit + 16 <= ip_end); |
---|
403 | op = EmitLiteral(op, next_emit, ip - next_emit, true); |
---|
404 | |
---|
405 | // Step 3: Call EmitCopy, and then see if another EmitCopy could |
---|
406 | // be our next move. Repeat until we find no match for the |
---|
407 | // input immediately after what was consumed by the last EmitCopy call. |
---|
408 | // |
---|
409 | // If we exit this loop normally then we need to call EmitLiteral next, |
---|
410 | // though we don't yet know how big the literal will be. We handle that |
---|
411 | // by proceeding to the next iteration of the main loop. We also can exit |
---|
412 | // this loop via goto if we get close to exhausting the input. |
---|
413 | EightBytesReference input_bytes; |
---|
414 | uint32 candidate_bytes = 0; |
---|
415 | |
---|
416 | do { |
---|
417 | // We have a 4-byte match at ip, and no need to emit any |
---|
418 | // "literal bytes" prior to ip. |
---|
419 | const char* base = ip; |
---|
420 | int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); |
---|
421 | ip += matched; |
---|
422 | size_t offset = base - candidate; |
---|
423 | assert(0 == memcmp(base, candidate, matched)); |
---|
424 | op = EmitCopy(op, offset, matched); |
---|
425 | // We could immediately start working at ip now, but to improve |
---|
426 | // compression we first update table[Hash(ip - 1, ...)]. |
---|
427 | const char* insert_tail = ip - 1; |
---|
428 | next_emit = ip; |
---|
429 | if (PREDICT_FALSE(ip >= ip_limit)) { |
---|
430 | goto emit_remainder; |
---|
431 | } |
---|
432 | input_bytes = GetEightBytesAt(insert_tail); |
---|
433 | uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); |
---|
434 | table[prev_hash] = ip - base_ip - 1; |
---|
435 | uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); |
---|
436 | candidate = base_ip + table[cur_hash]; |
---|
437 | candidate_bytes = UNALIGNED_LOAD32(candidate); |
---|
438 | table[cur_hash] = ip - base_ip; |
---|
439 | } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); |
---|
440 | |
---|
441 | next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); |
---|
442 | ++ip; |
---|
443 | } |
---|
444 | } |
---|
445 | |
---|
446 | emit_remainder: |
---|
447 | // Emit the remaining bytes as a literal |
---|
448 | if (next_emit < ip_end) { |
---|
449 | op = EmitLiteral(op, next_emit, ip_end - next_emit, false); |
---|
450 | } |
---|
451 | |
---|
452 | return op; |
---|
453 | } |
---|
454 | } // end namespace internal |
---|
455 | |
---|
456 | // Signature of output types needed by decompression code. |
---|
457 | // The decompression code is templatized on a type that obeys this |
---|
458 | // signature so that we do not pay virtual function call overhead in |
---|
459 | // the middle of a tight decompression loop. |
---|
460 | // |
---|
461 | // class DecompressionWriter { |
---|
462 | // public: |
---|
463 | // // Called before decompression |
---|
464 | // void SetExpectedLength(size_t length); |
---|
465 | // |
---|
466 | // // Called after decompression |
---|
467 | // bool CheckLength() const; |
---|
468 | // |
---|
469 | // // Called repeatedly during decompression |
---|
470 | // bool Append(const char* ip, size_t length); |
---|
471 | // bool AppendFromSelf(uint32 offset, size_t length); |
---|
472 | // |
---|
473 | // // The rules for how TryFastAppend differs from Append are somewhat |
---|
474 | // // convoluted: |
---|
475 | // // |
---|
476 | // // - TryFastAppend is allowed to decline (return false) at any |
---|
477 | // // time, for any reason -- just "return false" would be |
---|
478 | // // a perfectly legal implementation of TryFastAppend. |
---|
479 | // // The intention is for TryFastAppend to allow a fast path |
---|
480 | // // in the common case of a small append. |
---|
481 | // // - TryFastAppend is allowed to read up to <available> bytes |
---|
482 | // // from the input buffer, whereas Append is allowed to read |
---|
483 | // // <length>. However, if it returns true, it must leave |
---|
484 | // // at least five (kMaximumTagLength) bytes in the input buffer |
---|
485 | // // afterwards, so that there is always enough space to read the |
---|
486 | // // next tag without checking for a refill. |
---|
487 | // // - TryFastAppend must always return decline (return false) |
---|
488 | // // if <length> is 61 or more, as in this case the literal length is not |
---|
489 | // // decoded fully. In practice, this should not be a big problem, |
---|
490 | // // as it is unlikely that one would implement a fast path accepting |
---|
491 | // // this much data. |
---|
492 | // // |
---|
493 | // bool TryFastAppend(const char* ip, size_t available, size_t length); |
---|
494 | // }; |
---|
495 | |
---|
496 | // ----------------------------------------------------------------------- |
---|
497 | // Lookup table for decompression code. Generated by ComputeTable() below. |
---|
498 | // ----------------------------------------------------------------------- |
---|
499 | |
---|
500 | // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits |
---|
501 | static const uint32 wordmask[] = { |
---|
502 | 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu |
---|
503 | }; |
---|
504 | |
---|
505 | // Data stored per entry in lookup table: |
---|
506 | // Range Bits-used Description |
---|
507 | // ------------------------------------ |
---|
508 | // 1..64 0..7 Literal/copy length encoded in opcode byte |
---|
509 | // 0..7 8..10 Copy offset encoded in opcode byte / 256 |
---|
510 | // 0..4 11..13 Extra bytes after opcode |
---|
511 | // |
---|
512 | // We use eight bits for the length even though 7 would have sufficed |
---|
513 | // because of efficiency reasons: |
---|
514 | // (1) Extracting a byte is faster than a bit-field |
---|
515 | // (2) It properly aligns copy offset so we do not need a <<8 |
---|
516 | static const uint16 char_table[256] = { |
---|
517 | 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, |
---|
518 | 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, |
---|
519 | 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, |
---|
520 | 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, |
---|
521 | 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, |
---|
522 | 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, |
---|
523 | 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, |
---|
524 | 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, |
---|
525 | 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, |
---|
526 | 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, |
---|
527 | 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, |
---|
528 | 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, |
---|
529 | 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, |
---|
530 | 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, |
---|
531 | 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, |
---|
532 | 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, |
---|
533 | 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, |
---|
534 | 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, |
---|
535 | 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, |
---|
536 | 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, |
---|
537 | 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, |
---|
538 | 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, |
---|
539 | 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, |
---|
540 | 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, |
---|
541 | 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, |
---|
542 | 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, |
---|
543 | 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, |
---|
544 | 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, |
---|
545 | 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, |
---|
546 | 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, |
---|
547 | 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, |
---|
548 | 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 |
---|
549 | }; |
---|
550 | |
---|
551 | // In debug mode, allow optional computation of the table at startup. |
---|
552 | // Also, check that the decompression table is correct. |
---|
553 | #ifndef NDEBUG |
---|
554 | DEFINE_bool(snappy_dump_decompression_table, false, |
---|
555 | "If true, we print the decompression table at startup."); |
---|
556 | |
---|
557 | static uint16 MakeEntry(unsigned int extra, |
---|
558 | unsigned int len, |
---|
559 | unsigned int copy_offset) { |
---|
560 | // Check that all of the fields fit within the allocated space |
---|
561 | assert(extra == (extra & 0x7)); // At most 3 bits |
---|
562 | assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits |
---|
563 | assert(len == (len & 0x7f)); // At most 7 bits |
---|
564 | return len | (copy_offset << 8) | (extra << 11); |
---|
565 | } |
---|
566 | |
---|
567 | static void ComputeTable() { |
---|
568 | uint16 dst[256]; |
---|
569 | |
---|
570 | // Place invalid entries in all places to detect missing initialization |
---|
571 | int assigned = 0; |
---|
572 | for (int i = 0; i < 256; i++) { |
---|
573 | dst[i] = 0xffff; |
---|
574 | } |
---|
575 | |
---|
576 | // Small LITERAL entries. We store (len-1) in the top 6 bits. |
---|
577 | for (unsigned int len = 1; len <= 60; len++) { |
---|
578 | dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); |
---|
579 | assigned++; |
---|
580 | } |
---|
581 | |
---|
582 | // Large LITERAL entries. We use 60..63 in the high 6 bits to |
---|
583 | // encode the number of bytes of length info that follow the opcode. |
---|
584 | for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { |
---|
585 | // We set the length field in the lookup table to 1 because extra |
---|
586 | // bytes encode len-1. |
---|
587 | dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); |
---|
588 | assigned++; |
---|
589 | } |
---|
590 | |
---|
591 | // COPY_1_BYTE_OFFSET. |
---|
592 | // |
---|
593 | // The tag byte in the compressed data stores len-4 in 3 bits, and |
---|
594 | // offset/256 in 5 bits. offset%256 is stored in the next byte. |
---|
595 | // |
---|
596 | // This format is used for length in range [4..11] and offset in |
---|
597 | // range [0..2047] |
---|
598 | for (unsigned int len = 4; len < 12; len++) { |
---|
599 | for (unsigned int offset = 0; offset < 2048; offset += 256) { |
---|
600 | dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = |
---|
601 | MakeEntry(1, len, offset>>8); |
---|
602 | assigned++; |
---|
603 | } |
---|
604 | } |
---|
605 | |
---|
606 | // COPY_2_BYTE_OFFSET. |
---|
607 | // Tag contains len-1 in top 6 bits, and offset in next two bytes. |
---|
608 | for (unsigned int len = 1; len <= 64; len++) { |
---|
609 | dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); |
---|
610 | assigned++; |
---|
611 | } |
---|
612 | |
---|
613 | // COPY_4_BYTE_OFFSET. |
---|
614 | // Tag contents len-1 in top 6 bits, and offset in next four bytes. |
---|
615 | for (unsigned int len = 1; len <= 64; len++) { |
---|
616 | dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); |
---|
617 | assigned++; |
---|
618 | } |
---|
619 | |
---|
620 | // Check that each entry was initialized exactly once. |
---|
621 | if (assigned != 256) { |
---|
622 | fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned); |
---|
623 | abort(); |
---|
624 | } |
---|
625 | for (int i = 0; i < 256; i++) { |
---|
626 | if (dst[i] == 0xffff) { |
---|
627 | fprintf(stderr, "ComputeTable: did not assign byte %d\n", i); |
---|
628 | abort(); |
---|
629 | } |
---|
630 | } |
---|
631 | |
---|
632 | if (FLAGS_snappy_dump_decompression_table) { |
---|
633 | printf("static const uint16 char_table[256] = {\n "); |
---|
634 | for (int i = 0; i < 256; i++) { |
---|
635 | printf("0x%04x%s", |
---|
636 | dst[i], |
---|
637 | ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); |
---|
638 | } |
---|
639 | printf("};\n"); |
---|
640 | } |
---|
641 | |
---|
642 | // Check that computed table matched recorded table |
---|
643 | for (int i = 0; i < 256; i++) { |
---|
644 | if (dst[i] != char_table[i]) { |
---|
645 | fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n", |
---|
646 | i, static_cast<int>(dst[i]), static_cast<int>(char_table[i])); |
---|
647 | abort(); |
---|
648 | } |
---|
649 | } |
---|
650 | } |
---|
651 | #endif /* !NDEBUG */ |
---|
652 | |
---|
653 | // Helper class for decompression |
---|
654 | class SnappyDecompressor { |
---|
655 | private: |
---|
656 | Source* reader_; // Underlying source of bytes to decompress |
---|
657 | const char* ip_; // Points to next buffered byte |
---|
658 | const char* ip_limit_; // Points just past buffered bytes |
---|
659 | uint32 peeked_; // Bytes peeked from reader (need to skip) |
---|
660 | bool eof_; // Hit end of input without an error? |
---|
661 | char scratch_[kMaximumTagLength]; // See RefillTag(). |
---|
662 | |
---|
663 | // Ensure that all of the tag metadata for the next tag is available |
---|
664 | // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even |
---|
665 | // if (ip_limit_ - ip_ < 5). |
---|
666 | // |
---|
667 | // Returns true on success, false on error or end of input. |
---|
668 | bool RefillTag(); |
---|
669 | |
---|
670 | public: |
---|
671 | explicit SnappyDecompressor(Source* reader) |
---|
672 | : reader_(reader), |
---|
673 | ip_(NULL), |
---|
674 | ip_limit_(NULL), |
---|
675 | peeked_(0), |
---|
676 | eof_(false) { |
---|
677 | } |
---|
678 | |
---|
679 | ~SnappyDecompressor() { |
---|
680 | // Advance past any bytes we peeked at from the reader |
---|
681 | reader_->Skip(peeked_); |
---|
682 | } |
---|
683 | |
---|
684 | // Returns true iff we have hit the end of the input without an error. |
---|
685 | bool eof() const { |
---|
686 | return eof_; |
---|
687 | } |
---|
688 | |
---|
689 | // Read the uncompressed length stored at the start of the compressed data. |
---|
690 | // On succcess, stores the length in *result and returns true. |
---|
691 | // On failure, returns false. |
---|
692 | bool ReadUncompressedLength(uint32* result) { |
---|
693 | assert(ip_ == NULL); // Must not have read anything yet |
---|
694 | // Length is encoded in 1..5 bytes |
---|
695 | *result = 0; |
---|
696 | uint32 shift = 0; |
---|
697 | while (true) { |
---|
698 | if (shift >= 32) return false; |
---|
699 | size_t n; |
---|
700 | const char* ip = reader_->Peek(&n); |
---|
701 | if (n == 0) return false; |
---|
702 | const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); |
---|
703 | reader_->Skip(1); |
---|
704 | *result |= static_cast<uint32>(c & 0x7f) << shift; |
---|
705 | if (c < 128) { |
---|
706 | break; |
---|
707 | } |
---|
708 | shift += 7; |
---|
709 | } |
---|
710 | return true; |
---|
711 | } |
---|
712 | |
---|
713 | // Process the next item found in the input. |
---|
714 | // Returns true if successful, false on error or end of input. |
---|
715 | template <class Writer> |
---|
716 | void DecompressAllTags(Writer* writer) { |
---|
717 | const char* ip = ip_; |
---|
718 | |
---|
719 | // We could have put this refill fragment only at the beginning of the loop. |
---|
720 | // However, duplicating it at the end of each branch gives the compiler more |
---|
721 | // scope to optimize the <ip_limit_ - ip> expression based on the local |
---|
722 | // context, which overall increases speed. |
---|
723 | #define MAYBE_REFILL() \ |
---|
724 | if (ip_limit_ - ip < kMaximumTagLength) { \ |
---|
725 | ip_ = ip; \ |
---|
726 | if (!RefillTag()) return; \ |
---|
727 | ip = ip_; \ |
---|
728 | } |
---|
729 | |
---|
730 | MAYBE_REFILL(); |
---|
731 | for ( ;; ) { |
---|
732 | const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); |
---|
733 | |
---|
734 | if ((c & 0x3) == LITERAL) { |
---|
735 | size_t literal_length = (c >> 2) + 1u; |
---|
736 | if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { |
---|
737 | assert(literal_length < 61); |
---|
738 | ip += literal_length; |
---|
739 | // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend() |
---|
740 | // will not return true unless there's already at least five spare |
---|
741 | // bytes in addition to the literal. |
---|
742 | continue; |
---|
743 | } |
---|
744 | if (PREDICT_FALSE(literal_length >= 61)) { |
---|
745 | // Long literal. |
---|
746 | const size_t literal_length_length = literal_length - 60; |
---|
747 | literal_length = |
---|
748 | (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; |
---|
749 | ip += literal_length_length; |
---|
750 | } |
---|
751 | |
---|
752 | size_t avail = ip_limit_ - ip; |
---|
753 | while (avail < literal_length) { |
---|
754 | if (!writer->Append(ip, avail)) return; |
---|
755 | literal_length -= avail; |
---|
756 | reader_->Skip(peeked_); |
---|
757 | size_t n; |
---|
758 | ip = reader_->Peek(&n); |
---|
759 | avail = n; |
---|
760 | peeked_ = avail; |
---|
761 | if (avail == 0) return; // Premature end of input |
---|
762 | ip_limit_ = ip + avail; |
---|
763 | } |
---|
764 | if (!writer->Append(ip, literal_length)) { |
---|
765 | return; |
---|
766 | } |
---|
767 | ip += literal_length; |
---|
768 | MAYBE_REFILL(); |
---|
769 | } else { |
---|
770 | const uint32 entry = char_table[c]; |
---|
771 | const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; |
---|
772 | const uint32 length = entry & 0xff; |
---|
773 | ip += entry >> 11; |
---|
774 | |
---|
775 | // copy_offset/256 is encoded in bits 8..10. By just fetching |
---|
776 | // those bits, we get copy_offset (since the bit-field starts at |
---|
777 | // bit 8). |
---|
778 | const uint32 copy_offset = entry & 0x700; |
---|
779 | if (!writer->AppendFromSelf(copy_offset + trailer, length)) { |
---|
780 | return; |
---|
781 | } |
---|
782 | MAYBE_REFILL(); |
---|
783 | } |
---|
784 | } |
---|
785 | |
---|
786 | #undef MAYBE_REFILL |
---|
787 | } |
---|
788 | }; |
---|
789 | |
---|
790 | bool SnappyDecompressor::RefillTag() { |
---|
791 | const char* ip = ip_; |
---|
792 | if (ip == ip_limit_) { |
---|
793 | // Fetch a new fragment from the reader |
---|
794 | reader_->Skip(peeked_); // All peeked bytes are used up |
---|
795 | size_t n; |
---|
796 | ip = reader_->Peek(&n); |
---|
797 | peeked_ = n; |
---|
798 | if (n == 0) { |
---|
799 | eof_ = true; |
---|
800 | return false; |
---|
801 | } |
---|
802 | ip_limit_ = ip + n; |
---|
803 | } |
---|
804 | |
---|
805 | // Read the tag character |
---|
806 | assert(ip < ip_limit_); |
---|
807 | const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); |
---|
808 | const uint32 entry = char_table[c]; |
---|
809 | const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' |
---|
810 | assert(needed <= sizeof(scratch_)); |
---|
811 | |
---|
812 | // Read more bytes from reader if needed |
---|
813 | uint32 nbuf = ip_limit_ - ip; |
---|
814 | if (nbuf < needed) { |
---|
815 | // Stitch together bytes from ip and reader to form the word |
---|
816 | // contents. We store the needed bytes in "scratch_". They |
---|
817 | // will be consumed immediately by the caller since we do not |
---|
818 | // read more than we need. |
---|
819 | memmove(scratch_, ip, nbuf); |
---|
820 | reader_->Skip(peeked_); // All peeked bytes are used up |
---|
821 | peeked_ = 0; |
---|
822 | while (nbuf < needed) { |
---|
823 | size_t length; |
---|
824 | const char* src = reader_->Peek(&length); |
---|
825 | if (length == 0) return false; |
---|
826 | uint32 to_add = min<uint32>(needed - nbuf, length); |
---|
827 | memcpy(scratch_ + nbuf, src, to_add); |
---|
828 | nbuf += to_add; |
---|
829 | reader_->Skip(to_add); |
---|
830 | } |
---|
831 | assert(nbuf == needed); |
---|
832 | ip_ = scratch_; |
---|
833 | ip_limit_ = scratch_ + needed; |
---|
834 | } else if (nbuf < kMaximumTagLength) { |
---|
835 | // Have enough bytes, but move into scratch_ so that we do not |
---|
836 | // read past end of input |
---|
837 | memmove(scratch_, ip, nbuf); |
---|
838 | reader_->Skip(peeked_); // All peeked bytes are used up |
---|
839 | peeked_ = 0; |
---|
840 | ip_ = scratch_; |
---|
841 | ip_limit_ = scratch_ + nbuf; |
---|
842 | } else { |
---|
843 | // Pass pointer to buffer returned by reader_. |
---|
844 | ip_ = ip; |
---|
845 | } |
---|
846 | return true; |
---|
847 | } |
---|
848 | |
---|
849 | template <typename Writer> |
---|
850 | static bool InternalUncompress(Source* r, Writer* writer) { |
---|
851 | // Read the uncompressed length from the front of the compressed input |
---|
852 | SnappyDecompressor decompressor(r); |
---|
853 | uint32 uncompressed_len = 0; |
---|
854 | if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; |
---|
855 | return InternalUncompressAllTags(&decompressor, writer, uncompressed_len); |
---|
856 | } |
---|
857 | |
---|
858 | template <typename Writer> |
---|
859 | static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, |
---|
860 | Writer* writer, |
---|
861 | uint32 uncompressed_len) { |
---|
862 | writer->SetExpectedLength(uncompressed_len); |
---|
863 | |
---|
864 | // Process the entire input |
---|
865 | decompressor->DecompressAllTags(writer); |
---|
866 | return (decompressor->eof() && writer->CheckLength()); |
---|
867 | } |
---|
868 | |
---|
869 | bool GetUncompressedLength(Source* source, uint32* result) { |
---|
870 | SnappyDecompressor decompressor(source); |
---|
871 | return decompressor.ReadUncompressedLength(result); |
---|
872 | } |
---|
873 | |
---|
874 | size_t Compress(Source* reader, Sink* writer) { |
---|
875 | size_t written = 0; |
---|
876 | size_t N = reader->Available(); |
---|
877 | char ulength[Varint::kMax32]; |
---|
878 | char* p = Varint::Encode32(ulength, N); |
---|
879 | writer->Append(ulength, p-ulength); |
---|
880 | written += (p - ulength); |
---|
881 | |
---|
882 | internal::WorkingMemory wmem; |
---|
883 | char* scratch = NULL; |
---|
884 | char* scratch_output = NULL; |
---|
885 | |
---|
886 | while (N > 0) { |
---|
887 | // Get next block to compress (without copying if possible) |
---|
888 | size_t fragment_size; |
---|
889 | const char* fragment = reader->Peek(&fragment_size); |
---|
890 | assert(fragment_size != 0); // premature end of input |
---|
891 | const size_t num_to_read = min(N, kBlockSize); |
---|
892 | size_t bytes_read = fragment_size; |
---|
893 | |
---|
894 | size_t pending_advance = 0; |
---|
895 | if (bytes_read >= num_to_read) { |
---|
896 | // Buffer returned by reader is large enough |
---|
897 | pending_advance = num_to_read; |
---|
898 | fragment_size = num_to_read; |
---|
899 | } else { |
---|
900 | // Read into scratch buffer |
---|
901 | if (scratch == NULL) { |
---|
902 | // If this is the last iteration, we want to allocate N bytes |
---|
903 | // of space, otherwise the max possible kBlockSize space. |
---|
904 | // num_to_read contains exactly the correct value |
---|
905 | scratch = new char[num_to_read]; |
---|
906 | } |
---|
907 | memcpy(scratch, fragment, bytes_read); |
---|
908 | reader->Skip(bytes_read); |
---|
909 | |
---|
910 | while (bytes_read < num_to_read) { |
---|
911 | fragment = reader->Peek(&fragment_size); |
---|
912 | size_t n = min<size_t>(fragment_size, num_to_read - bytes_read); |
---|
913 | memcpy(scratch + bytes_read, fragment, n); |
---|
914 | bytes_read += n; |
---|
915 | reader->Skip(n); |
---|
916 | } |
---|
917 | assert(bytes_read == num_to_read); |
---|
918 | fragment = scratch; |
---|
919 | fragment_size = num_to_read; |
---|
920 | } |
---|
921 | assert(fragment_size == num_to_read); |
---|
922 | |
---|
923 | // Get encoding table for compression |
---|
924 | int table_size; |
---|
925 | uint16* table = wmem.GetHashTable(num_to_read, &table_size); |
---|
926 | |
---|
927 | // Compress input_fragment and append to dest |
---|
928 | const int max_output = MaxCompressedLength(num_to_read); |
---|
929 | |
---|
930 | // Need a scratch buffer for the output, in case the byte sink doesn't |
---|
931 | // have room for us directly. |
---|
932 | if (scratch_output == NULL) { |
---|
933 | scratch_output = new char[max_output]; |
---|
934 | } else { |
---|
935 | // Since we encode kBlockSize regions followed by a region |
---|
936 | // which is <= kBlockSize in length, a previously allocated |
---|
937 | // scratch_output[] region is big enough for this iteration. |
---|
938 | } |
---|
939 | char* dest = writer->GetAppendBuffer(max_output, scratch_output); |
---|
940 | char* end = internal::CompressFragment(fragment, fragment_size, |
---|
941 | dest, table, table_size); |
---|
942 | writer->Append(dest, end - dest); |
---|
943 | written += (end - dest); |
---|
944 | |
---|
945 | N -= num_to_read; |
---|
946 | reader->Skip(pending_advance); |
---|
947 | } |
---|
948 | |
---|
949 | delete[] scratch; |
---|
950 | delete[] scratch_output; |
---|
951 | |
---|
952 | return written; |
---|
953 | } |
---|
954 | |
---|
955 | // ----------------------------------------------------------------------- |
---|
956 | // IOVec interfaces |
---|
957 | // ----------------------------------------------------------------------- |
---|
958 | |
---|
959 | // A type that writes to an iovec. |
---|
960 | // Note that this is not a "ByteSink", but a type that matches the |
---|
961 | // Writer template argument to SnappyDecompressor::DecompressAllTags(). |
---|
962 | class SnappyIOVecWriter { |
---|
963 | private: |
---|
964 | const struct iovec* output_iov_; |
---|
965 | const size_t output_iov_count_; |
---|
966 | |
---|
967 | // We are currently writing into output_iov_[curr_iov_index_]. |
---|
968 | int curr_iov_index_; |
---|
969 | |
---|
970 | // Bytes written to output_iov_[curr_iov_index_] so far. |
---|
971 | size_t curr_iov_written_; |
---|
972 | |
---|
973 | // Total bytes decompressed into output_iov_ so far. |
---|
974 | size_t total_written_; |
---|
975 | |
---|
976 | // Maximum number of bytes that will be decompressed into output_iov_. |
---|
977 | size_t output_limit_; |
---|
978 | |
---|
979 | inline char* GetIOVecPointer(int index, size_t offset) { |
---|
980 | return reinterpret_cast<char*>(output_iov_[index].iov_base) + |
---|
981 | offset; |
---|
982 | } |
---|
983 | |
---|
984 | public: |
---|
985 | // Does not take ownership of iov. iov must be valid during the |
---|
986 | // entire lifetime of the SnappyIOVecWriter. |
---|
987 | inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) |
---|
988 | : output_iov_(iov), |
---|
989 | output_iov_count_(iov_count), |
---|
990 | curr_iov_index_(0), |
---|
991 | curr_iov_written_(0), |
---|
992 | total_written_(0), |
---|
993 | output_limit_(-1) { |
---|
994 | } |
---|
995 | |
---|
996 | inline void SetExpectedLength(size_t len) { |
---|
997 | output_limit_ = len; |
---|
998 | } |
---|
999 | |
---|
1000 | inline bool CheckLength() const { |
---|
1001 | return total_written_ == output_limit_; |
---|
1002 | } |
---|
1003 | |
---|
1004 | inline bool Append(const char* ip, size_t len) { |
---|
1005 | if (total_written_ + len > output_limit_) { |
---|
1006 | return false; |
---|
1007 | } |
---|
1008 | |
---|
1009 | while (len > 0) { |
---|
1010 | assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); |
---|
1011 | if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) { |
---|
1012 | // This iovec is full. Go to the next one. |
---|
1013 | if (curr_iov_index_ + 1 >= output_iov_count_) { |
---|
1014 | return false; |
---|
1015 | } |
---|
1016 | curr_iov_written_ = 0; |
---|
1017 | ++curr_iov_index_; |
---|
1018 | } |
---|
1019 | |
---|
1020 | const size_t to_write = std::min( |
---|
1021 | len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_); |
---|
1022 | memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_), |
---|
1023 | ip, |
---|
1024 | to_write); |
---|
1025 | curr_iov_written_ += to_write; |
---|
1026 | total_written_ += to_write; |
---|
1027 | ip += to_write; |
---|
1028 | len -= to_write; |
---|
1029 | } |
---|
1030 | |
---|
1031 | return true; |
---|
1032 | } |
---|
1033 | |
---|
1034 | inline bool TryFastAppend(const char* ip, size_t available, size_t len) { |
---|
1035 | const size_t space_left = output_limit_ - total_written_; |
---|
1036 | if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && |
---|
1037 | output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) { |
---|
1038 | // Fast path, used for the majority (about 95%) of invocations. |
---|
1039 | char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_); |
---|
1040 | UnalignedCopy64(ip, ptr); |
---|
1041 | UnalignedCopy64(ip + 8, ptr + 8); |
---|
1042 | curr_iov_written_ += len; |
---|
1043 | total_written_ += len; |
---|
1044 | return true; |
---|
1045 | } |
---|
1046 | |
---|
1047 | return false; |
---|
1048 | } |
---|
1049 | |
---|
1050 | inline bool AppendFromSelf(size_t offset, size_t len) { |
---|
1051 | if (offset > total_written_ || offset == 0) { |
---|
1052 | return false; |
---|
1053 | } |
---|
1054 | const size_t space_left = output_limit_ - total_written_; |
---|
1055 | if (len > space_left) { |
---|
1056 | return false; |
---|
1057 | } |
---|
1058 | |
---|
1059 | // Locate the iovec from which we need to start the copy. |
---|
1060 | int from_iov_index = curr_iov_index_; |
---|
1061 | size_t from_iov_offset = curr_iov_written_; |
---|
1062 | while (offset > 0) { |
---|
1063 | if (from_iov_offset >= offset) { |
---|
1064 | from_iov_offset -= offset; |
---|
1065 | break; |
---|
1066 | } |
---|
1067 | |
---|
1068 | offset -= from_iov_offset; |
---|
1069 | --from_iov_index; |
---|
1070 | assert(from_iov_index >= 0); |
---|
1071 | from_iov_offset = output_iov_[from_iov_index].iov_len; |
---|
1072 | } |
---|
1073 | |
---|
1074 | // Copy <len> bytes starting from the iovec pointed to by from_iov_index to |
---|
1075 | // the current iovec. |
---|
1076 | while (len > 0) { |
---|
1077 | assert(from_iov_index <= curr_iov_index_); |
---|
1078 | if (from_iov_index != curr_iov_index_) { |
---|
1079 | const size_t to_copy = std::min( |
---|
1080 | output_iov_[from_iov_index].iov_len - from_iov_offset, |
---|
1081 | len); |
---|
1082 | Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy); |
---|
1083 | len -= to_copy; |
---|
1084 | if (len > 0) { |
---|
1085 | ++from_iov_index; |
---|
1086 | from_iov_offset = 0; |
---|
1087 | } |
---|
1088 | } else { |
---|
1089 | assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); |
---|
1090 | size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len - |
---|
1091 | curr_iov_written_, |
---|
1092 | len); |
---|
1093 | if (to_copy == 0) { |
---|
1094 | // This iovec is full. Go to the next one. |
---|
1095 | if (curr_iov_index_ + 1 >= output_iov_count_) { |
---|
1096 | return false; |
---|
1097 | } |
---|
1098 | ++curr_iov_index_; |
---|
1099 | curr_iov_written_ = 0; |
---|
1100 | continue; |
---|
1101 | } |
---|
1102 | if (to_copy > len) { |
---|
1103 | to_copy = len; |
---|
1104 | } |
---|
1105 | IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset), |
---|
1106 | GetIOVecPointer(curr_iov_index_, curr_iov_written_), |
---|
1107 | to_copy); |
---|
1108 | curr_iov_written_ += to_copy; |
---|
1109 | from_iov_offset += to_copy; |
---|
1110 | total_written_ += to_copy; |
---|
1111 | len -= to_copy; |
---|
1112 | } |
---|
1113 | } |
---|
1114 | |
---|
1115 | return true; |
---|
1116 | } |
---|
1117 | |
---|
1118 | }; |
---|
1119 | |
---|
1120 | bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, |
---|
1121 | const struct iovec* iov, size_t iov_cnt) { |
---|
1122 | ByteArraySource reader(compressed, compressed_length); |
---|
1123 | return RawUncompressToIOVec(&reader, iov, iov_cnt); |
---|
1124 | } |
---|
1125 | |
---|
1126 | bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, |
---|
1127 | size_t iov_cnt) { |
---|
1128 | SnappyIOVecWriter output(iov, iov_cnt); |
---|
1129 | return InternalUncompress(compressed, &output); |
---|
1130 | } |
---|
1131 | |
---|
1132 | // ----------------------------------------------------------------------- |
---|
1133 | // Flat array interfaces |
---|
1134 | // ----------------------------------------------------------------------- |
---|
1135 | |
---|
1136 | // A type that writes to a flat array. |
---|
1137 | // Note that this is not a "ByteSink", but a type that matches the |
---|
1138 | // Writer template argument to SnappyDecompressor::DecompressAllTags(). |
---|
1139 | class SnappyArrayWriter { |
---|
1140 | private: |
---|
1141 | char* base_; |
---|
1142 | char* op_; |
---|
1143 | char* op_limit_; |
---|
1144 | |
---|
1145 | public: |
---|
1146 | inline explicit SnappyArrayWriter(char* dst) |
---|
1147 | : base_(dst), |
---|
1148 | op_(dst) { |
---|
1149 | } |
---|
1150 | |
---|
1151 | inline void SetExpectedLength(size_t len) { |
---|
1152 | op_limit_ = op_ + len; |
---|
1153 | } |
---|
1154 | |
---|
1155 | inline bool CheckLength() const { |
---|
1156 | return op_ == op_limit_; |
---|
1157 | } |
---|
1158 | |
---|
1159 | inline bool Append(const char* ip, size_t len) { |
---|
1160 | char* op = op_; |
---|
1161 | const size_t space_left = op_limit_ - op; |
---|
1162 | if (space_left < len) { |
---|
1163 | return false; |
---|
1164 | } |
---|
1165 | memcpy(op, ip, len); |
---|
1166 | op_ = op + len; |
---|
1167 | return true; |
---|
1168 | } |
---|
1169 | |
---|
1170 | inline bool TryFastAppend(const char* ip, size_t available, size_t len) { |
---|
1171 | char* op = op_; |
---|
1172 | const size_t space_left = op_limit_ - op; |
---|
1173 | if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { |
---|
1174 | // Fast path, used for the majority (about 95%) of invocations. |
---|
1175 | UnalignedCopy64(ip, op); |
---|
1176 | UnalignedCopy64(ip + 8, op + 8); |
---|
1177 | op_ = op + len; |
---|
1178 | return true; |
---|
1179 | } else { |
---|
1180 | return false; |
---|
1181 | } |
---|
1182 | } |
---|
1183 | |
---|
1184 | inline bool AppendFromSelf(size_t offset, size_t len) { |
---|
1185 | char* op = op_; |
---|
1186 | const size_t space_left = op_limit_ - op; |
---|
1187 | |
---|
1188 | // Check if we try to append from before the start of the buffer. |
---|
1189 | // Normally this would just be a check for "produced < offset", |
---|
1190 | // but "produced <= offset - 1u" is equivalent for every case |
---|
1191 | // except the one where offset==0, where the right side will wrap around |
---|
1192 | // to a very big number. This is convenient, as offset==0 is another |
---|
1193 | // invalid case that we also want to catch, so that we do not go |
---|
1194 | // into an infinite loop. |
---|
1195 | assert(op >= base_); |
---|
1196 | size_t produced = op - base_; |
---|
1197 | if (produced <= offset - 1u) { |
---|
1198 | return false; |
---|
1199 | } |
---|
1200 | if (len <= 16 && offset >= 8 && space_left >= 16) { |
---|
1201 | // Fast path, used for the majority (70-80%) of dynamic invocations. |
---|
1202 | UnalignedCopy64(op - offset, op); |
---|
1203 | UnalignedCopy64(op - offset + 8, op + 8); |
---|
1204 | } else { |
---|
1205 | if (space_left >= len + kMaxIncrementCopyOverflow) { |
---|
1206 | IncrementalCopyFastPath(op - offset, op, len); |
---|
1207 | } else { |
---|
1208 | if (space_left < len) { |
---|
1209 | return false; |
---|
1210 | } |
---|
1211 | IncrementalCopy(op - offset, op, len); |
---|
1212 | } |
---|
1213 | } |
---|
1214 | |
---|
1215 | op_ = op + len; |
---|
1216 | return true; |
---|
1217 | } |
---|
1218 | }; |
---|
1219 | |
---|
1220 | bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { |
---|
1221 | ByteArraySource reader(compressed, n); |
---|
1222 | return RawUncompress(&reader, uncompressed); |
---|
1223 | } |
---|
1224 | |
---|
1225 | bool RawUncompress(Source* compressed, char* uncompressed) { |
---|
1226 | SnappyArrayWriter output(uncompressed); |
---|
1227 | return InternalUncompress(compressed, &output); |
---|
1228 | } |
---|
1229 | |
---|
1230 | bool Uncompress(const char* compressed, size_t n, string* uncompressed) { |
---|
1231 | size_t ulength; |
---|
1232 | if (!GetUncompressedLength(compressed, n, &ulength)) { |
---|
1233 | return false; |
---|
1234 | } |
---|
1235 | // On 32-bit builds: max_size() < kuint32max. Check for that instead |
---|
1236 | // of crashing (e.g., consider externally specified compressed data). |
---|
1237 | if (ulength > uncompressed->max_size()) { |
---|
1238 | return false; |
---|
1239 | } |
---|
1240 | STLStringResizeUninitialized(uncompressed, ulength); |
---|
1241 | return RawUncompress(compressed, n, string_as_array(uncompressed)); |
---|
1242 | } |
---|
1243 | |
---|
1244 | |
---|
1245 | // A Writer that drops everything on the floor and just does validation |
---|
1246 | class SnappyDecompressionValidator { |
---|
1247 | private: |
---|
1248 | size_t expected_; |
---|
1249 | size_t produced_; |
---|
1250 | |
---|
1251 | public: |
---|
1252 | inline SnappyDecompressionValidator() : produced_(0) { } |
---|
1253 | inline void SetExpectedLength(size_t len) { |
---|
1254 | expected_ = len; |
---|
1255 | } |
---|
1256 | inline bool CheckLength() const { |
---|
1257 | return expected_ == produced_; |
---|
1258 | } |
---|
1259 | inline bool Append(const char* ip, size_t len) { |
---|
1260 | produced_ += len; |
---|
1261 | return produced_ <= expected_; |
---|
1262 | } |
---|
1263 | inline bool TryFastAppend(const char* ip, size_t available, size_t length) { |
---|
1264 | return false; |
---|
1265 | } |
---|
1266 | inline bool AppendFromSelf(size_t offset, size_t len) { |
---|
1267 | // See SnappyArrayWriter::AppendFromSelf for an explanation of |
---|
1268 | // the "offset - 1u" trick. |
---|
1269 | if (produced_ <= offset - 1u) return false; |
---|
1270 | produced_ += len; |
---|
1271 | return produced_ <= expected_; |
---|
1272 | } |
---|
1273 | }; |
---|
1274 | |
---|
1275 | bool IsValidCompressedBuffer(const char* compressed, size_t n) { |
---|
1276 | ByteArraySource reader(compressed, n); |
---|
1277 | SnappyDecompressionValidator writer; |
---|
1278 | return InternalUncompress(&reader, &writer); |
---|
1279 | } |
---|
1280 | |
---|
1281 | void RawCompress(const char* input, |
---|
1282 | size_t input_length, |
---|
1283 | char* compressed, |
---|
1284 | size_t* compressed_length) { |
---|
1285 | ByteArraySource reader(input, input_length); |
---|
1286 | UncheckedByteArraySink writer(compressed); |
---|
1287 | Compress(&reader, &writer); |
---|
1288 | |
---|
1289 | // Compute how many bytes were added |
---|
1290 | *compressed_length = (writer.CurrentDestination() - compressed); |
---|
1291 | } |
---|
1292 | |
---|
1293 | size_t Compress(const char* input, size_t input_length, string* compressed) { |
---|
1294 | // Pre-grow the buffer to the max length of the compressed output |
---|
1295 | compressed->resize(MaxCompressedLength(input_length)); |
---|
1296 | |
---|
1297 | size_t compressed_length; |
---|
1298 | RawCompress(input, input_length, string_as_array(compressed), |
---|
1299 | &compressed_length); |
---|
1300 | compressed->resize(compressed_length); |
---|
1301 | return compressed_length; |
---|
1302 | } |
---|
1303 | |
---|
1304 | |
---|
1305 | } // end namespace snappy |
---|
1306 | |
---|