source: thirdparty/blosc/internal-complibs/zstd-0.7.4/legacy/zstd_v01.c @ 8ebc79b

Revision 8ebc79b, 71.7 KB checked in by Hal Finkel <hfinkel@…>, 8 years ago (diff)

Add the other internal compression libraries from blocs

  • Property mode set to 100644
Line 
1/* ******************************************************************
2   ZSTD_v01
3   Zstandard decoder, compatible with v0.1.x format
4   Copyright (C) 2013-2015, Yann Collet.
5
6   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are
10   met:
11
12       * Redistributions of source code must retain the above copyright
13   notice, this list of conditions and the following disclaimer.
14       * Redistributions in binary form must reproduce the above
15   copyright notice, this list of conditions and the following disclaimer
16   in the documentation and/or other materials provided with the
17   distribution.
18
19   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31   You can contact the author at :
32   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33   - Public forum : https://groups.google.com/forum/#!forum/lz4c
34****************************************************************** */
35
36/******************************************
37*  Includes
38******************************************/
39#include <stddef.h>    /* size_t, ptrdiff_t */
40#include "zstd_v01.h"
41
42
43/******************************************
44*  Static allocation
45******************************************/
46/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
47#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
48
49/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
50#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
51#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
52        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
53
54
55/******************************************
56*  Error Management
57******************************************/
58#define FSE_LIST_ERRORS(ITEM) \
59        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
60        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
61        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
62        ITEM(FSE_ERROR_corruptionDetected) \
63        ITEM(FSE_ERROR_maxCode)
64
65#define FSE_GENERATE_ENUM(ENUM) ENUM,
66typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
67
68
69/******************************************
70*  FSE symbol compression API
71******************************************/
72/*
73   This API consists of small unitary functions, which highly benefit from being inlined.
74   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
75   Visual seems to do it automatically.
76   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
77   If none of these solutions is applicable, include "fse.c" directly.
78*/
79
80typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
81typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
82
83typedef struct
84{
85    size_t bitContainer;
86    int    bitPos;
87    char*  startPtr;
88    char*  ptr;
89    char*  endPtr;
90} FSE_CStream_t;
91
92typedef struct
93{
94    ptrdiff_t   value;
95    const void* stateTable;
96    const void* symbolTT;
97    unsigned    stateLog;
98} FSE_CState_t;
99
100typedef struct
101{
102    size_t   bitContainer;
103    unsigned bitsConsumed;
104    const char* ptr;
105    const char* start;
106} FSE_DStream_t;
107
108typedef struct
109{
110    size_t      state;
111    const void* table;   /* precise table may vary, depending on U16 */
112} FSE_DState_t;
113
114typedef enum { FSE_DStream_unfinished = 0,
115               FSE_DStream_endOfBuffer = 1,
116               FSE_DStream_completed = 2,
117               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
118               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
119
120
121/****************************************************************
122*  Tuning parameters
123****************************************************************/
124/* MEMORY_USAGE :
125*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
126*  Increasing memory usage improves compression ratio
127*  Reduced memory usage can improve speed, due to cache effect
128*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
129#define FSE_MAX_MEMORY_USAGE 14
130#define FSE_DEFAULT_MEMORY_USAGE 13
131
132/* FSE_MAX_SYMBOL_VALUE :
133*  Maximum symbol value authorized.
134*  Required for proper stack allocation */
135#define FSE_MAX_SYMBOL_VALUE 255
136
137
138/****************************************************************
139*  template functions type & suffix
140****************************************************************/
141#define FSE_FUNCTION_TYPE BYTE
142#define FSE_FUNCTION_EXTENSION
143
144
145/****************************************************************
146*  Byte symbol type
147****************************************************************/
148typedef struct
149{
150    unsigned short newState;
151    unsigned char  symbol;
152    unsigned char  nbBits;
153} FSE_decode_t;   /* size == U32 */
154
155
156
157/****************************************************************
158*  Compiler specifics
159****************************************************************/
160#ifdef _MSC_VER    /* Visual Studio */
161#  define FORCE_INLINE static __forceinline
162#  include <intrin.h>                    /* For Visual 2005 */
163#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
164#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
165#else
166#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
167#  ifdef __GNUC__
168#    define FORCE_INLINE static inline __attribute__((always_inline))
169#  else
170#    define FORCE_INLINE static inline
171#  endif
172#endif
173
174
175/****************************************************************
176*  Includes
177****************************************************************/
178#include <stdlib.h>     /* malloc, free, qsort */
179#include <string.h>     /* memcpy, memset */
180#include <stdio.h>      /* printf (debug) */
181
182
183#ifndef MEM_ACCESS_MODULE
184#define MEM_ACCESS_MODULE
185/****************************************************************
186*  Basic Types
187*****************************************************************/
188#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
189# include <stdint.h>
190typedef  uint8_t BYTE;
191typedef uint16_t U16;
192typedef  int16_t S16;
193typedef uint32_t U32;
194typedef  int32_t S32;
195typedef uint64_t U64;
196typedef  int64_t S64;
197#else
198typedef unsigned char       BYTE;
199typedef unsigned short      U16;
200typedef   signed short      S16;
201typedef unsigned int        U32;
202typedef   signed int        S32;
203typedef unsigned long long  U64;
204typedef   signed long long  S64;
205#endif
206
207#endif   /* MEM_ACCESS_MODULE */
208
209/****************************************************************
210*  Memory I/O
211*****************************************************************/
212/* FSE_FORCE_MEMORY_ACCESS
213 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
214 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
215 * The below switch allow to select different access method for improved performance.
216 * Method 0 (default) : use `memcpy()`. Safe and portable.
217 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
218 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
219 * Method 2 : direct access. This method is portable but violate C standard.
220 *            It can generate buggy code on targets generating assembly depending on alignment.
221 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
222 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
223 * Prefer these methods in priority order (0 > 1 > 2)
224 */
225#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
226#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
227#    define FSE_FORCE_MEMORY_ACCESS 2
228#  elif defined(__INTEL_COMPILER) || \
229  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
230#    define FSE_FORCE_MEMORY_ACCESS 1
231#  endif
232#endif
233
234
235static unsigned FSE_32bits(void)
236{
237    return sizeof(void*)==4;
238}
239
240static unsigned FSE_isLittleEndian(void)
241{
242    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
243    return one.c[0];
244}
245
246#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
247
248static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
249static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
250static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
251
252#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
253
254/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
255/* currently only defined for gcc and icc */
256typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
257
258static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
259static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
260static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
261
262#else
263
264static U16 FSE_read16(const void* memPtr)
265{
266    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
267}
268
269static U32 FSE_read32(const void* memPtr)
270{
271    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
272}
273
274static U64 FSE_read64(const void* memPtr)
275{
276    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
277}
278
279#endif // FSE_FORCE_MEMORY_ACCESS
280
281static U16 FSE_readLE16(const void* memPtr)
282{
283    if (FSE_isLittleEndian())
284        return FSE_read16(memPtr);
285    else
286    {
287        const BYTE* p = (const BYTE*)memPtr;
288        return (U16)(p[0] + (p[1]<<8));
289    }
290}
291
292static U32 FSE_readLE32(const void* memPtr)
293{
294    if (FSE_isLittleEndian())
295        return FSE_read32(memPtr);
296    else
297    {
298        const BYTE* p = (const BYTE*)memPtr;
299        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
300    }
301}
302
303
304static U64 FSE_readLE64(const void* memPtr)
305{
306    if (FSE_isLittleEndian())
307        return FSE_read64(memPtr);
308    else
309    {
310        const BYTE* p = (const BYTE*)memPtr;
311        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
312                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
313    }
314}
315
316static size_t FSE_readLEST(const void* memPtr)
317{
318    if (FSE_32bits())
319        return (size_t)FSE_readLE32(memPtr);
320    else
321        return (size_t)FSE_readLE64(memPtr);
322}
323
324
325
326/****************************************************************
327*  Constants
328*****************************************************************/
329#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
330#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
331#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
332#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
333#define FSE_MIN_TABLELOG 5
334
335#define FSE_TABLELOG_ABSOLUTE_MAX 15
336#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
337#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
338#endif
339
340
341/****************************************************************
342*  Error Management
343****************************************************************/
344#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
345
346
347/****************************************************************
348*  Complex types
349****************************************************************/
350typedef struct
351{
352    int deltaFindState;
353    U32 deltaNbBits;
354} FSE_symbolCompressionTransform; /* total 8 bytes */
355
356typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
357
358/****************************************************************
359*  Internal functions
360****************************************************************/
361FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
362{
363#   if defined(_MSC_VER)   /* Visual */
364    unsigned long r;
365    _BitScanReverse ( &r, val );
366    return (unsigned) r;
367#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
368    return 31 - __builtin_clz (val);
369#   else   /* Software version */
370    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
371    U32 v = val;
372    unsigned r;
373    v |= v >> 1;
374    v |= v >> 2;
375    v |= v >> 4;
376    v |= v >> 8;
377    v |= v >> 16;
378    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
379    return r;
380#   endif
381}
382
383
384/****************************************************************
385*  Templates
386****************************************************************/
387/*
388  designed to be included
389  for type-specific functions (template emulation in C)
390  Objective is to write these functions only once, for improved maintenance
391*/
392
393/* safety checks */
394#ifndef FSE_FUNCTION_EXTENSION
395#  error "FSE_FUNCTION_EXTENSION must be defined"
396#endif
397#ifndef FSE_FUNCTION_TYPE
398#  error "FSE_FUNCTION_TYPE must be defined"
399#endif
400
401/* Function names */
402#define FSE_CAT(X,Y) X##Y
403#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
404#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
405
406
407
408static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
409
410#define FSE_DECODE_TYPE FSE_decode_t
411
412
413typedef struct {
414    U16 tableLog;
415    U16 fastMode;
416} FSE_DTableHeader;   /* sizeof U32 */
417
418static size_t FSE_buildDTable
419(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
420{
421    void* ptr = dt;
422    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
423    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
424    const U32 tableSize = 1 << tableLog;
425    const U32 tableMask = tableSize-1;
426    const U32 step = FSE_tableStep(tableSize);
427    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
428    U32 position = 0;
429    U32 highThreshold = tableSize-1;
430    const S16 largeLimit= (S16)(1 << (tableLog-1));
431    U32 noLarge = 1;
432    U32 s;
433
434    /* Sanity Checks */
435    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
436    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
437
438    /* Init, lay down lowprob symbols */
439    DTableH[0].tableLog = (U16)tableLog;
440    for (s=0; s<=maxSymbolValue; s++)
441    {
442        if (normalizedCounter[s]==-1)
443        {
444            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
445            symbolNext[s] = 1;
446        }
447        else
448        {
449            if (normalizedCounter[s] >= largeLimit) noLarge=0;
450            symbolNext[s] = normalizedCounter[s];
451        }
452    }
453
454    /* Spread symbols */
455    for (s=0; s<=maxSymbolValue; s++)
456    {
457        int i;
458        for (i=0; i<normalizedCounter[s]; i++)
459        {
460            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
461            position = (position + step) & tableMask;
462            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
463        }
464    }
465
466    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
467
468    /* Build Decoding table */
469    {
470        U32 i;
471        for (i=0; i<tableSize; i++)
472        {
473            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
474            U16 nextState = symbolNext[symbol]++;
475            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
476            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
477        }
478    }
479
480    DTableH->fastMode = (U16)noLarge;
481    return 0;
482}
483
484
485/******************************************
486*  FSE byte symbol
487******************************************/
488#ifndef FSE_COMMONDEFS_ONLY
489
490static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
491
492static short FSE_abs(short a)
493{
494    return a<0? -a : a;
495}
496
497
498/****************************************************************
499*  Header bitstream management
500****************************************************************/
501static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
502                 const void* headerBuffer, size_t hbSize)
503{
504    const BYTE* const istart = (const BYTE*) headerBuffer;
505    const BYTE* const iend = istart + hbSize;
506    const BYTE* ip = istart;
507    int nbBits;
508    int remaining;
509    int threshold;
510    U32 bitStream;
511    int bitCount;
512    unsigned charnum = 0;
513    int previous0 = 0;
514
515    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
516    bitStream = FSE_readLE32(ip);
517    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
518    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
519    bitStream >>= 4;
520    bitCount = 4;
521    *tableLogPtr = nbBits;
522    remaining = (1<<nbBits)+1;
523    threshold = 1<<nbBits;
524    nbBits++;
525
526    while ((remaining>1) && (charnum<=*maxSVPtr))
527    {
528        if (previous0)
529        {
530            unsigned n0 = charnum;
531            while ((bitStream & 0xFFFF) == 0xFFFF)
532            {
533                n0+=24;
534                if (ip < iend-5)
535                {
536                    ip+=2;
537                    bitStream = FSE_readLE32(ip) >> bitCount;
538                }
539                else
540                {
541                    bitStream >>= 16;
542                    bitCount+=16;
543                }
544            }
545            while ((bitStream & 3) == 3)
546            {
547                n0+=3;
548                bitStream>>=2;
549                bitCount+=2;
550            }
551            n0 += bitStream & 3;
552            bitCount += 2;
553            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
554            while (charnum < n0) normalizedCounter[charnum++] = 0;
555            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
556            {
557                ip += bitCount>>3;
558                bitCount &= 7;
559                bitStream = FSE_readLE32(ip) >> bitCount;
560            }
561            else
562                bitStream >>= 2;
563        }
564        {
565            const short max = (short)((2*threshold-1)-remaining);
566            short count;
567
568            if ((bitStream & (threshold-1)) < (U32)max)
569            {
570                count = (short)(bitStream & (threshold-1));
571                bitCount   += nbBits-1;
572            }
573            else
574            {
575                count = (short)(bitStream & (2*threshold-1));
576                if (count >= threshold) count -= max;
577                bitCount   += nbBits;
578            }
579
580            count--;   /* extra accuracy */
581            remaining -= FSE_abs(count);
582            normalizedCounter[charnum++] = count;
583            previous0 = !count;
584            while (remaining < threshold)
585            {
586                nbBits--;
587                threshold >>= 1;
588            }
589
590            {
591                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
592                {
593                    ip += bitCount>>3;
594                    bitCount &= 7;
595                }
596                else
597                {
598                    bitCount -= (int)(8 * (iend - 4 - ip));
599                    ip = iend - 4;
600                }
601                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
602            }
603        }
604    }
605    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
606    *maxSVPtr = charnum-1;
607
608    ip += (bitCount+7)>>3;
609    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
610    return ip-istart;
611}
612
613
614/*********************************************************
615*  Decompression (Byte symbols)
616*********************************************************/
617static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
618{
619    void* ptr = dt;
620    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
621    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
622
623    DTableH->tableLog = 0;
624    DTableH->fastMode = 0;
625
626    cell->newState = 0;
627    cell->symbol = symbolValue;
628    cell->nbBits = 0;
629
630    return 0;
631}
632
633
634static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
635{
636    void* ptr = dt;
637    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
638    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
639    const unsigned tableSize = 1 << nbBits;
640    const unsigned tableMask = tableSize - 1;
641    const unsigned maxSymbolValue = tableMask;
642    unsigned s;
643
644    /* Sanity checks */
645    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
646
647    /* Build Decoding Table */
648    DTableH->tableLog = (U16)nbBits;
649    DTableH->fastMode = 1;
650    for (s=0; s<=maxSymbolValue; s++)
651    {
652        dinfo[s].newState = 0;
653        dinfo[s].symbol = (BYTE)s;
654        dinfo[s].nbBits = (BYTE)nbBits;
655    }
656
657    return 0;
658}
659
660
661/* FSE_initDStream
662 * Initialize a FSE_DStream_t.
663 * srcBuffer must point at the beginning of an FSE block.
664 * The function result is the size of the FSE_block (== srcSize).
665 * If srcSize is too small, the function will return an errorCode;
666 */
667static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
668{
669    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
670
671    if (srcSize >=  sizeof(size_t))
672    {
673        U32 contain32;
674        bitD->start = (const char*)srcBuffer;
675        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
676        bitD->bitContainer = FSE_readLEST(bitD->ptr);
677        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
678        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
679        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
680    }
681    else
682    {
683        U32 contain32;
684        bitD->start = (const char*)srcBuffer;
685        bitD->ptr   = bitD->start;
686        bitD->bitContainer = *(const BYTE*)(bitD->start);
687        switch(srcSize)
688        {
689            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
690            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
691            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
692            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
693            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
694            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
695            default:;
696        }
697        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
698        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
699        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
700        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
701    }
702
703    return srcSize;
704}
705
706
707/*!FSE_lookBits
708 * Provides next n bits from the bitContainer.
709 * bitContainer is not modified (bits are still present for next read/look)
710 * On 32-bits, maxNbBits==25
711 * On 64-bits, maxNbBits==57
712 * return : value extracted.
713 */
714static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
715{
716    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
717    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
718}
719
720static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
721{
722    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
723    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
724}
725
726static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
727{
728    bitD->bitsConsumed += nbBits;
729}
730
731
732/*!FSE_readBits
733 * Read next n bits from the bitContainer.
734 * On 32-bits, don't read more than maxNbBits==25
735 * On 64-bits, don't read more than maxNbBits==57
736 * Use the fast variant *only* if n >= 1.
737 * return : value extracted.
738 */
739static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
740{
741    size_t value = FSE_lookBits(bitD, nbBits);
742    FSE_skipBits(bitD, nbBits);
743    return value;
744}
745
746static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
747{
748    size_t value = FSE_lookBitsFast(bitD, nbBits);
749    FSE_skipBits(bitD, nbBits);
750    return value;
751}
752
753static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
754{
755    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
756        return FSE_DStream_tooFar;
757
758    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
759    {
760        bitD->ptr -= bitD->bitsConsumed >> 3;
761        bitD->bitsConsumed &= 7;
762        bitD->bitContainer = FSE_readLEST(bitD->ptr);
763        return FSE_DStream_unfinished;
764    }
765    if (bitD->ptr == bitD->start)
766    {
767        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
768        return FSE_DStream_completed;
769    }
770    {
771        U32 nbBytes = bitD->bitsConsumed >> 3;
772        U32 result = FSE_DStream_unfinished;
773        if (bitD->ptr - nbBytes < bitD->start)
774        {
775            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
776            result = FSE_DStream_endOfBuffer;
777        }
778        bitD->ptr -= nbBytes;
779        bitD->bitsConsumed -= nbBytes*8;
780        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
781        return result;
782    }
783}
784
785
786static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
787{
788    const void* ptr = dt;
789    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
790    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
791    FSE_reloadDStream(bitD);
792    DStatePtr->table = dt + 1;
793}
794
795static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
796{
797    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
798    const U32  nbBits = DInfo.nbBits;
799    BYTE symbol = DInfo.symbol;
800    size_t lowBits = FSE_readBits(bitD, nbBits);
801
802    DStatePtr->state = DInfo.newState + lowBits;
803    return symbol;
804}
805
806static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
807{
808    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
809    const U32 nbBits = DInfo.nbBits;
810    BYTE symbol = DInfo.symbol;
811    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
812
813    DStatePtr->state = DInfo.newState + lowBits;
814    return symbol;
815}
816
817/* FSE_endOfDStream
818   Tells if bitD has reached end of bitStream or not */
819
820static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
821{
822    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
823}
824
825static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
826{
827    return DStatePtr->state == 0;
828}
829
830
831FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
832          void* dst, size_t maxDstSize,
833    const void* cSrc, size_t cSrcSize,
834    const FSE_DTable* dt, const unsigned fast)
835{
836    BYTE* const ostart = (BYTE*) dst;
837    BYTE* op = ostart;
838    BYTE* const omax = op + maxDstSize;
839    BYTE* const olimit = omax-3;
840
841    FSE_DStream_t bitD;
842    FSE_DState_t state1;
843    FSE_DState_t state2;
844    size_t errorCode;
845
846    /* Init */
847    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
848    if (FSE_isError(errorCode)) return errorCode;
849
850    FSE_initDState(&state1, &bitD, dt);
851    FSE_initDState(&state2, &bitD, dt);
852
853#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
854
855    /* 4 symbols per loop */
856    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
857    {
858        op[0] = FSE_GETSYMBOL(&state1);
859
860        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
861            FSE_reloadDStream(&bitD);
862
863        op[1] = FSE_GETSYMBOL(&state2);
864
865        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
866            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
867
868        op[2] = FSE_GETSYMBOL(&state1);
869
870        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
871            FSE_reloadDStream(&bitD);
872
873        op[3] = FSE_GETSYMBOL(&state2);
874    }
875
876    /* tail */
877    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
878    while (1)
879    {
880        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
881            break;
882
883        *op++ = FSE_GETSYMBOL(&state1);
884
885        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
886            break;
887
888        *op++ = FSE_GETSYMBOL(&state2);
889    }
890
891    /* end ? */
892    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
893        return op-ostart;
894
895    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
896
897    return (size_t)-FSE_ERROR_corruptionDetected;
898}
899
900
901static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
902                            const void* cSrc, size_t cSrcSize,
903                            const FSE_DTable* dt)
904{
905    FSE_DTableHeader DTableH;
906    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
907
908    /* select fast mode (static) */
909    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
910    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
911}
912
913
914static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
915{
916    const BYTE* const istart = (const BYTE*)cSrc;
917    const BYTE* ip = istart;
918    short counting[FSE_MAX_SYMBOL_VALUE+1];
919    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
920    unsigned tableLog;
921    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
922    size_t errorCode;
923
924    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
925
926    /* normal FSE decoding mode */
927    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
928    if (FSE_isError(errorCode)) return errorCode;
929    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
930    ip += errorCode;
931    cSrcSize -= errorCode;
932
933    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
934    if (FSE_isError(errorCode)) return errorCode;
935
936    /* always return, even if it is an error code */
937    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
938}
939
940
941
942/* *******************************************************
943*  Huff0 : Huffman block compression
944*********************************************************/
945#define HUF_MAX_SYMBOL_VALUE 255
946#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
947#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
948#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
949#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
950#  error "HUF_MAX_TABLELOG is too large !"
951#endif
952
953typedef struct HUF_CElt_s {
954  U16  val;
955  BYTE nbBits;
956} HUF_CElt ;
957
958typedef struct nodeElt_s {
959    U32 count;
960    U16 parent;
961    BYTE byte;
962    BYTE nbBits;
963} nodeElt;
964
965
966/* *******************************************************
967*  Huff0 : Huffman block decompression
968*********************************************************/
969typedef struct {
970    BYTE byte;
971    BYTE nbBits;
972} HUF_DElt;
973
974static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
975{
976    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
977    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
978    U32 weightTotal;
979    U32 maxBits;
980    const BYTE* ip = (const BYTE*) src;
981    size_t iSize = ip[0];
982    size_t oSize;
983    U32 n;
984    U32 nextRankStart;
985    void* ptr = DTable+1;
986    HUF_DElt* const dt = (HUF_DElt*)ptr;
987
988    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
989    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
990    if (iSize >= 128)  /* special header */
991    {
992        if (iSize >= (242))   /* RLE */
993        {
994            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
995            oSize = l[iSize-242];
996            memset(huffWeight, 1, sizeof(huffWeight));
997            iSize = 0;
998        }
999        else   /* Incompressible */
1000        {
1001            oSize = iSize - 127;
1002            iSize = ((oSize+1)/2);
1003            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1004            ip += 1;
1005            for (n=0; n<oSize; n+=2)
1006            {
1007                huffWeight[n]   = ip[n/2] >> 4;
1008                huffWeight[n+1] = ip[n/2] & 15;
1009            }
1010        }
1011    }
1012    else  /* header compressed with FSE (normal case) */
1013    {
1014        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1015        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
1016        if (FSE_isError(oSize)) return oSize;
1017    }
1018
1019    /* collect weight stats */
1020    memset(rankVal, 0, sizeof(rankVal));
1021    weightTotal = 0;
1022    for (n=0; n<oSize; n++)
1023    {
1024        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1025        rankVal[huffWeight[n]]++;
1026        weightTotal += (1 << huffWeight[n]) >> 1;
1027    }
1028
1029    /* get last non-null symbol weight (implied, total must be 2^n) */
1030    maxBits = FSE_highbit32(weightTotal) + 1;
1031    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
1032    DTable[0] = (U16)maxBits;
1033    {
1034        U32 total = 1 << maxBits;
1035        U32 rest = total - weightTotal;
1036        U32 verif = 1 << FSE_highbit32(rest);
1037        U32 lastWeight = FSE_highbit32(rest) + 1;
1038        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
1039        huffWeight[oSize] = (BYTE)lastWeight;
1040        rankVal[lastWeight]++;
1041    }
1042
1043    /* check tree construction validity */
1044    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
1045
1046    /* Prepare ranks */
1047    nextRankStart = 0;
1048    for (n=1; n<=maxBits; n++)
1049    {
1050        U32 current = nextRankStart;
1051        nextRankStart += (rankVal[n] << (n-1));
1052        rankVal[n] = current;
1053    }
1054
1055    /* fill DTable */
1056    for (n=0; n<=oSize; n++)
1057    {
1058        const U32 w = huffWeight[n];
1059        const U32 length = (1 << w) >> 1;
1060        U32 i;
1061        HUF_DElt D;
1062        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1063        for (i = rankVal[w]; i < rankVal[w] + length; i++)
1064            dt[i] = D;
1065        rankVal[w] += length;
1066    }
1067
1068    return iSize+1;
1069}
1070
1071
1072static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1073{
1074        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1075        const BYTE c = dt[val].byte;
1076        FSE_skipBits(Dstream, dt[val].nbBits);
1077        return c;
1078}
1079
1080static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1081          void* dst, size_t maxDstSize,
1082    const void* cSrc, size_t cSrcSize,
1083    const U16* DTable)
1084{
1085    BYTE* const ostart = (BYTE*) dst;
1086    BYTE* op = ostart;
1087    BYTE* const omax = op + maxDstSize;
1088    BYTE* const olimit = omax-15;
1089
1090    const void* ptr = DTable;
1091    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1092    const U32 dtLog = DTable[0];
1093    size_t errorCode;
1094    U32 reloadStatus;
1095
1096    /* Init */
1097
1098    const U16* jumpTable = (const U16*)cSrc;
1099    const size_t length1 = FSE_readLE16(jumpTable);
1100    const size_t length2 = FSE_readLE16(jumpTable+1);
1101    const size_t length3 = FSE_readLE16(jumpTable+2);
1102    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
1103    const char* const start1 = (const char*)(cSrc) + 6;
1104    const char* const start2 = start1 + length1;
1105    const char* const start3 = start2 + length2;
1106    const char* const start4 = start3 + length3;
1107    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1108
1109    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1110
1111    errorCode = FSE_initDStream(&bitD1, start1, length1);
1112    if (FSE_isError(errorCode)) return errorCode;
1113    errorCode = FSE_initDStream(&bitD2, start2, length2);
1114    if (FSE_isError(errorCode)) return errorCode;
1115    errorCode = FSE_initDStream(&bitD3, start3, length3);
1116    if (FSE_isError(errorCode)) return errorCode;
1117    errorCode = FSE_initDStream(&bitD4, start4, length4);
1118    if (FSE_isError(errorCode)) return errorCode;
1119
1120    reloadStatus=FSE_reloadDStream(&bitD2);
1121
1122    /* 16 symbols per loop */
1123    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1124        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1125    {
1126#define HUF_DECODE_SYMBOL_0(n, Dstream) \
1127        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1128
1129#define HUF_DECODE_SYMBOL_1(n, Dstream) \
1130        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1131        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1132
1133#define HUF_DECODE_SYMBOL_2(n, Dstream) \
1134        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1135        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1136
1137        HUF_DECODE_SYMBOL_1( 0, bitD1);
1138        HUF_DECODE_SYMBOL_1( 1, bitD2);
1139        HUF_DECODE_SYMBOL_1( 2, bitD3);
1140        HUF_DECODE_SYMBOL_1( 3, bitD4);
1141        HUF_DECODE_SYMBOL_2( 4, bitD1);
1142        HUF_DECODE_SYMBOL_2( 5, bitD2);
1143        HUF_DECODE_SYMBOL_2( 6, bitD3);
1144        HUF_DECODE_SYMBOL_2( 7, bitD4);
1145        HUF_DECODE_SYMBOL_1( 8, bitD1);
1146        HUF_DECODE_SYMBOL_1( 9, bitD2);
1147        HUF_DECODE_SYMBOL_1(10, bitD3);
1148        HUF_DECODE_SYMBOL_1(11, bitD4);
1149        HUF_DECODE_SYMBOL_0(12, bitD1);
1150        HUF_DECODE_SYMBOL_0(13, bitD2);
1151        HUF_DECODE_SYMBOL_0(14, bitD3);
1152        HUF_DECODE_SYMBOL_0(15, bitD4);
1153    }
1154
1155    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1156        return (size_t)-FSE_ERROR_corruptionDetected;
1157
1158    /* tail */
1159    {
1160        // bitTail = bitD1;   // *much* slower : -20% !??!
1161        FSE_DStream_t bitTail;
1162        bitTail.ptr = bitD1.ptr;
1163        bitTail.bitsConsumed = bitD1.bitsConsumed;
1164        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
1165        bitTail.start = start1;
1166        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1167        {
1168            HUF_DECODE_SYMBOL_0(0, bitTail);
1169        }
1170
1171        if (FSE_endOfDStream(&bitTail))
1172            return op-ostart;
1173    }
1174
1175    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1176
1177    return (size_t)-FSE_ERROR_corruptionDetected;
1178}
1179
1180
1181static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1182{
1183    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1184    const BYTE* ip = (const BYTE*) cSrc;
1185    size_t errorCode;
1186
1187    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1188    if (FSE_isError(errorCode)) return errorCode;
1189    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1190    ip += errorCode;
1191    cSrcSize -= errorCode;
1192
1193    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1194}
1195
1196
1197#endif   /* FSE_COMMONDEFS_ONLY */
1198
1199/*
1200    zstd - standard compression library
1201    Header File for static linking only
1202    Copyright (C) 2014-2015, Yann Collet.
1203
1204    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1205
1206    Redistribution and use in source and binary forms, with or without
1207    modification, are permitted provided that the following conditions are
1208    met:
1209    * Redistributions of source code must retain the above copyright
1210    notice, this list of conditions and the following disclaimer.
1211    * Redistributions in binary form must reproduce the above
1212    copyright notice, this list of conditions and the following disclaimer
1213    in the documentation and/or other materials provided with the
1214    distribution.
1215    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1216    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1217    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1218    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1219    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1220    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1221    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1222    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1223    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1224    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1225    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1226
1227    You can contact the author at :
1228    - zstd source repository : https://github.com/Cyan4973/zstd
1229    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1230*/
1231
1232/* The objects defined into this file should be considered experimental.
1233 * They are not labelled stable, as their prototype may change in the future.
1234 * You can use them for tests, provide feedback, or if you can endure risk of future changes.
1235 */
1236
1237/**************************************
1238*  Error management
1239**************************************/
1240#define ZSTD_LIST_ERRORS(ITEM) \
1241        ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \
1242        ITEM(ZSTD_ERROR_MagicNumber) \
1243        ITEM(ZSTD_ERROR_SrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \
1244        ITEM(ZSTD_ERROR_corruption) \
1245        ITEM(ZSTD_ERROR_maxCode)
1246
1247#define ZSTD_GENERATE_ENUM(ENUM) ENUM,
1248typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes;   /* exposed list of errors; static linking only */
1249
1250/*
1251    zstd - standard compression library
1252    Copyright (C) 2014-2015, Yann Collet.
1253
1254    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1255
1256    Redistribution and use in source and binary forms, with or without
1257    modification, are permitted provided that the following conditions are
1258    met:
1259    * Redistributions of source code must retain the above copyright
1260    notice, this list of conditions and the following disclaimer.
1261    * Redistributions in binary form must reproduce the above
1262    copyright notice, this list of conditions and the following disclaimer
1263    in the documentation and/or other materials provided with the
1264    distribution.
1265    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1266    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1267    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1268    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1269    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1270    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1271    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1272    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1273    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1274    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1275    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1276
1277    You can contact the author at :
1278    - zstd source repository : https://github.com/Cyan4973/zstd
1279    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1280*/
1281
1282/****************************************************************
1283*  Tuning parameters
1284*****************************************************************/
1285/* MEMORY_USAGE :
1286*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1287*  Increasing memory usage improves compression ratio
1288*  Reduced memory usage can improve speed, due to cache effect */
1289#define ZSTD_MEMORY_USAGE 17
1290
1291
1292/**************************************
1293   CPU Feature Detection
1294**************************************/
1295/*
1296 * Automated efficient unaligned memory access detection
1297 * Based on known hardware architectures
1298 * This list will be updated thanks to feedbacks
1299 */
1300#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1301    || defined(__ARM_FEATURE_UNALIGNED) \
1302    || defined(__i386__) || defined(__x86_64__) \
1303    || defined(_M_IX86) || defined(_M_X64) \
1304    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1305    || (defined(_M_ARM) && (_M_ARM >= 7))
1306#  define ZSTD_UNALIGNED_ACCESS 1
1307#else
1308#  define ZSTD_UNALIGNED_ACCESS 0
1309#endif
1310
1311
1312/********************************************************
1313*  Includes
1314*********************************************************/
1315#include <stdlib.h>      /* calloc */
1316#include <string.h>      /* memcpy, memmove */
1317#include <stdio.h>       /* debug : printf */
1318
1319
1320/********************************************************
1321*  Compiler specifics
1322*********************************************************/
1323#ifdef __AVX2__
1324#  include <immintrin.h>   /* AVX2 intrinsics */
1325#endif
1326
1327#ifdef _MSC_VER    /* Visual Studio */
1328#  define FORCE_INLINE static __forceinline
1329#  include <intrin.h>                    /* For Visual 2005 */
1330#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1331#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1332#else
1333#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
1334#  ifdef __GNUC__
1335#    define FORCE_INLINE static inline __attribute__((always_inline))
1336#  else
1337#    define FORCE_INLINE static inline
1338#  endif
1339#endif
1340
1341
1342#ifndef MEM_ACCESS_MODULE
1343#define MEM_ACCESS_MODULE
1344/********************************************************
1345*  Basic Types
1346*********************************************************/
1347#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1348# include <stdint.h>
1349typedef  uint8_t BYTE;
1350typedef uint16_t U16;
1351typedef  int16_t S16;
1352typedef uint32_t U32;
1353typedef  int32_t S32;
1354typedef uint64_t U64;
1355#else
1356typedef unsigned char       BYTE;
1357typedef unsigned short      U16;
1358typedef   signed short      S16;
1359typedef unsigned int        U32;
1360typedef   signed int        S32;
1361typedef unsigned long long  U64;
1362#endif
1363
1364#endif   /* MEM_ACCESS_MODULE */
1365
1366
1367/********************************************************
1368*  Constants
1369*********************************************************/
1370static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1371
1372#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1373#define HASH_TABLESIZE (1 << HASH_LOG)
1374#define HASH_MASK (HASH_TABLESIZE - 1)
1375
1376#define KNUTH 2654435761
1377
1378#define BIT7 128
1379#define BIT6  64
1380#define BIT5  32
1381#define BIT4  16
1382
1383#define KB *(1 <<10)
1384#define MB *(1 <<20)
1385#define GB *(1U<<30)
1386
1387#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1388
1389#define WORKPLACESIZE (BLOCKSIZE*3)
1390#define MINMATCH 4
1391#define MLbits   7
1392#define LLbits   6
1393#define Offbits  5
1394#define MaxML  ((1<<MLbits )-1)
1395#define MaxLL  ((1<<LLbits )-1)
1396#define MaxOff ((1<<Offbits)-1)
1397#define LitFSELog  11
1398#define MLFSELog   10
1399#define LLFSELog   10
1400#define OffFSELog   9
1401#define MAX(a,b) ((a)<(b)?(b):(a))
1402#define MaxSeq MAX(MaxLL, MaxML)
1403
1404#define LITERAL_NOENTROPY 63
1405#define COMMAND_NOENTROPY 7   /* to remove */
1406
1407static const size_t ZSTD_blockHeaderSize = 3;
1408static const size_t ZSTD_frameHeaderSize = 4;
1409
1410
1411/********************************************************
1412*  Memory operations
1413*********************************************************/
1414static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1415
1416static unsigned ZSTD_isLittleEndian(void)
1417{
1418    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1419    return one.c[0];
1420}
1421
1422static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1423
1424static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
1425
1426static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1427
1428static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1429
1430#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1431
1432static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
1433{
1434    const BYTE* ip = (const BYTE*)src;
1435    BYTE* op = (BYTE*)dst;
1436    BYTE* const oend = op + length;
1437    while (op < oend) COPY8(op, ip);
1438}
1439
1440static U16 ZSTD_readLE16(const void* memPtr)
1441{
1442    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1443    else
1444    {
1445        const BYTE* p = (const BYTE*)memPtr;
1446        return (U16)((U16)p[0] + ((U16)p[1]<<8));
1447    }
1448}
1449
1450
1451static U32 ZSTD_readLE32(const void* memPtr)
1452{
1453    if (ZSTD_isLittleEndian())
1454        return ZSTD_read32(memPtr);
1455    else
1456    {
1457        const BYTE* p = (const BYTE*)memPtr;
1458        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
1459    }
1460}
1461
1462static U32 ZSTD_readBE32(const void* memPtr)
1463{
1464    const BYTE* p = (const BYTE*)memPtr;
1465    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1466}
1467
1468
1469/**************************************
1470*  Local structures
1471***************************************/
1472typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1473
1474typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1475
1476typedef struct
1477{
1478    blockType_t blockType;
1479    U32 origSize;
1480} blockProperties_t;
1481
1482typedef struct {
1483    void* buffer;
1484    U32*  offsetStart;
1485    U32*  offset;
1486    BYTE* offCodeStart;
1487    BYTE* offCode;
1488    BYTE* litStart;
1489    BYTE* lit;
1490    BYTE* litLengthStart;
1491    BYTE* litLength;
1492    BYTE* matchLengthStart;
1493    BYTE* matchLength;
1494    BYTE* dumpsStart;
1495    BYTE* dumps;
1496} seqStore_t;
1497
1498
1499typedef struct ZSTD_Cctx_s
1500{
1501    const BYTE* base;
1502    U32 current;
1503    U32 nextUpdate;
1504    seqStore_t seqStore;
1505#ifdef __AVX2__
1506    __m256i hashTable[HASH_TABLESIZE>>3];
1507#else
1508    U32 hashTable[HASH_TABLESIZE];
1509#endif
1510        BYTE buffer[WORKPLACESIZE];
1511} cctxi_t;
1512
1513
1514
1515
1516/**************************************
1517*  Error Management
1518**************************************/
1519/* tells if a return value is an error code */
1520static unsigned ZSTD_isError(size_t code) { return (code > (size_t)(-ZSTD_ERROR_maxCode)); }
1521
1522/* published entry point */
1523unsigned ZSTDv01_isError(size_t code) { return ZSTD_isError(code); }
1524
1525
1526/**************************************
1527*  Tool functions
1528**************************************/
1529#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1530#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1531#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1532#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1533
1534/**************************************************************
1535*   Decompression code
1536**************************************************************/
1537
1538static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1539{
1540    const BYTE* const in = (const BYTE* const)src;
1541    BYTE headerFlags;
1542    U32 cSize;
1543
1544    if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize;
1545
1546    headerFlags = *in;
1547    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1548
1549    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1550    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1551
1552    if (bpPtr->blockType == bt_end) return 0;
1553    if (bpPtr->blockType == bt_rle) return 1;
1554    return cSize;
1555}
1556
1557
1558static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1559{
1560    if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1561    memcpy(dst, src, srcSize);
1562    return srcSize;
1563}
1564
1565
1566static size_t ZSTD_decompressLiterals(void* ctx,
1567                                      void* dst, size_t maxDstSize,
1568                                const void* src, size_t srcSize)
1569{
1570    BYTE* op = (BYTE*)dst;
1571    BYTE* const oend = op + maxDstSize;
1572    const BYTE* ip = (const BYTE*)src;
1573    size_t errorCode;
1574    size_t litSize;
1575
1576    /* check : minimum 2, for litSize, +1, for content */
1577    if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption;
1578
1579    litSize = ip[1] + (ip[0]<<8);
1580    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
1581    op = oend - litSize;
1582
1583    (void)ctx;
1584    if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1585    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1586    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1587    return litSize;
1588}
1589
1590
1591static size_t ZSTD_decodeLiteralsBlock(void* ctx,
1592                                void* dst, size_t maxDstSize,
1593                          const BYTE** litStart, size_t* litSize,
1594                          const void* src, size_t srcSize)
1595{
1596    const BYTE* const istart = (const BYTE* const)src;
1597    const BYTE* ip = istart;
1598    BYTE* const ostart = (BYTE* const)dst;
1599    BYTE* const oend = ostart + maxDstSize;
1600    blockProperties_t litbp;
1601
1602    size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
1603    if (ZSTD_isError(litcSize)) return litcSize;
1604    if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
1605    ip += ZSTD_blockHeaderSize;
1606
1607    switch(litbp.blockType)
1608    {
1609    case bt_raw:
1610        *litStart = ip;
1611        ip += litcSize;
1612        *litSize = litcSize;
1613        break;
1614    case bt_rle:
1615        {
1616            size_t rleSize = litbp.origSize;
1617            if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1618            memset(oend - rleSize, *ip, rleSize);
1619            *litStart = oend - rleSize;
1620            *litSize = rleSize;
1621            ip++;
1622            break;
1623        }
1624    case bt_compressed:
1625        {
1626            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1627            if (ZSTD_isError(decodedLitSize)) return decodedLitSize;
1628            *litStart = oend - decodedLitSize;
1629            *litSize = decodedLitSize;
1630            ip += litcSize;
1631            break;
1632        }
1633    case bt_end:
1634    default:
1635        return (size_t)-ZSTD_ERROR_GENERIC;
1636    }
1637
1638    return ip-istart;
1639}
1640
1641
1642static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1643                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1644                         const void* src, size_t srcSize)
1645{
1646    const BYTE* const istart = (const BYTE* const)src;
1647    const BYTE* ip = istart;
1648    const BYTE* const iend = istart + srcSize;
1649    U32 LLtype, Offtype, MLtype;
1650    U32 LLlog, Offlog, MLlog;
1651    size_t dumpsLength;
1652
1653    /* check */
1654    if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize;
1655
1656    /* SeqHead */
1657    *nbSeq = ZSTD_readLE16(ip); ip+=2;
1658    LLtype  = *ip >> 6;
1659    Offtype = (*ip >> 4) & 3;
1660    MLtype  = (*ip >> 2) & 3;
1661    if (*ip & 2)
1662    {
1663        dumpsLength  = ip[2];
1664        dumpsLength += ip[1] << 8;
1665        ip += 3;
1666    }
1667    else
1668    {
1669        dumpsLength  = ip[1];
1670        dumpsLength += (ip[0] & 1) << 8;
1671        ip += 2;
1672    }
1673    *dumpsPtr = ip;
1674    ip += dumpsLength;
1675    *dumpsLengthPtr = dumpsLength;
1676
1677    /* check */
1678    if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1679
1680    /* sequences */
1681    {
1682        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1683        size_t headerSize;
1684
1685        /* Build DTables */
1686        switch(LLtype)
1687        {
1688        U32 max;
1689        case bt_rle :
1690            LLlog = 0;
1691            FSE_buildDTable_rle(DTableLL, *ip++); break;
1692        case bt_raw :
1693            LLlog = LLbits;
1694            FSE_buildDTable_raw(DTableLL, LLbits); break;
1695        default :
1696            max = MaxLL;
1697            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1698            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1699            if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption;
1700            ip += headerSize;
1701            FSE_buildDTable(DTableLL, norm, max, LLlog);
1702        }
1703
1704        switch(Offtype)
1705        {
1706        U32 max;
1707        case bt_rle :
1708            Offlog = 0;
1709            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
1710            FSE_buildDTable_rle(DTableOffb, *ip++); break;
1711        case bt_raw :
1712            Offlog = Offbits;
1713            FSE_buildDTable_raw(DTableOffb, Offbits); break;
1714        default :
1715            max = MaxOff;
1716            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1717            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1718            if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption;
1719            ip += headerSize;
1720            FSE_buildDTable(DTableOffb, norm, max, Offlog);
1721        }
1722
1723        switch(MLtype)
1724        {
1725        U32 max;
1726        case bt_rle :
1727            MLlog = 0;
1728            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
1729            FSE_buildDTable_rle(DTableML, *ip++); break;
1730        case bt_raw :
1731            MLlog = MLbits;
1732            FSE_buildDTable_raw(DTableML, MLbits); break;
1733        default :
1734            max = MaxML;
1735            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1736            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1737            if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption;
1738            ip += headerSize;
1739            FSE_buildDTable(DTableML, norm, max, MLlog);
1740        }
1741    }
1742
1743    return ip-istart;
1744}
1745
1746
1747typedef struct {
1748    size_t litLength;
1749    size_t offset;
1750    size_t matchLength;
1751} seq_t;
1752
1753typedef struct {
1754    FSE_DStream_t DStream;
1755    FSE_DState_t stateLL;
1756    FSE_DState_t stateOffb;
1757    FSE_DState_t stateML;
1758    size_t prevOffset;
1759    const BYTE* dumps;
1760    const BYTE* dumpsEnd;
1761} seqState_t;
1762
1763
1764static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1765{
1766    size_t litLength;
1767    size_t prevOffset;
1768    size_t offset;
1769    size_t matchLength;
1770    const BYTE* dumps = seqState->dumps;
1771    const BYTE* const de = seqState->dumpsEnd;
1772
1773    /* Literal length */
1774    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1775    prevOffset = litLength ? seq->offset : seqState->prevOffset;
1776    seqState->prevOffset = seq->offset;
1777    if (litLength == MaxLL)
1778    {
1779        U32 add = dumps<de ? *dumps++ : 0;
1780        if (add < 255) litLength += add;
1781        else
1782        {
1783            if (dumps<=(de-3))
1784            {
1785                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1786                dumps += 3;
1787            }
1788        }
1789    }
1790
1791    /* Offset */
1792    {
1793        U32 offsetCode, nbBits;
1794        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1795        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1796        nbBits = offsetCode - 1;
1797        if (offsetCode==0) nbBits = 0;   /* cmove */
1798        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1799        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1800        if (offsetCode==0) offset = prevOffset;
1801    }
1802
1803    /* MatchLength */
1804    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1805    if (matchLength == MaxML)
1806    {
1807        U32 add = dumps<de ? *dumps++ : 0;
1808        if (add < 255) matchLength += add;
1809        else
1810        {
1811            if (dumps<=(de-3))
1812            {
1813                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1814                dumps += 3;
1815            }
1816        }
1817    }
1818    matchLength += MINMATCH;
1819
1820    /* save result */
1821    seq->litLength = litLength;
1822    seq->offset = offset;
1823    seq->matchLength = matchLength;
1824    seqState->dumps = dumps;
1825}
1826
1827
1828static size_t ZSTD_execSequence(BYTE* op,
1829                                seq_t sequence,
1830                                const BYTE** litPtr, const BYTE* const litLimit,
1831                                BYTE* const base, BYTE* const oend)
1832{
1833    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1834    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
1835    const BYTE* const ostart = op;
1836    const size_t litLength = sequence.litLength;
1837    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1838    const BYTE* const litEnd = *litPtr + litLength;
1839
1840    /* check */
1841    if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;   /* overwrite beyond dst buffer */
1842    if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption;
1843    if (sequence.matchLength > (size_t)(*litPtr-op))  return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;    /* overwrite literal segment */
1844
1845    /* copy Literals */
1846    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1847        memmove(op, *litPtr, litLength);   /* overwrite risk */
1848    else
1849        ZSTD_wildcopy(op, *litPtr, litLength);
1850    op += litLength;
1851    *litPtr = litEnd;   /* update for next sequence */
1852
1853    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1854    if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1855
1856    /* copy Match */
1857    {
1858        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1859        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1860        size_t qutt = 12;
1861        U64 saved[2];
1862
1863        /* check */
1864        if (match < base) return (size_t)-ZSTD_ERROR_corruption;
1865        if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption;
1866
1867        /* save beginning of literal sequence, in case of write overlap */
1868        if (overlapRisk)
1869        {
1870            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1871            memcpy(saved, endMatch, qutt);
1872        }
1873
1874        if (sequence.offset < 8)
1875        {
1876            const int dec64 = dec64table[sequence.offset];
1877            op[0] = match[0];
1878            op[1] = match[1];
1879            op[2] = match[2];
1880            op[3] = match[3];
1881            match += dec32table[sequence.offset];
1882            ZSTD_copy4(op+4, match);
1883            match -= dec64;
1884        } else { ZSTD_copy8(op, match); }
1885        op += 8; match += 8;
1886
1887        if (endMatch > oend-12)
1888        {
1889            if (op < oend-8)
1890            {
1891                ZSTD_wildcopy(op, match, (oend-8) - op);
1892                match += (oend-8) - op;
1893                op = oend-8;
1894            }
1895            while (op<endMatch) *op++ = *match++;
1896        }
1897        else
1898            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
1899
1900        /* restore, in case of overlap */
1901        if (overlapRisk) memcpy(endMatch, saved, qutt);
1902    }
1903
1904    return endMatch-ostart;
1905}
1906
1907typedef struct ZSTDv01_Dctx_s
1908{
1909    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1910    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1911    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1912    void* previousDstEnd;
1913    void* base;
1914    size_t expected;
1915    blockType_t bType;
1916    U32 phase;
1917} dctx_t;
1918
1919
1920static size_t ZSTD_decompressSequences(
1921                               void* ctx,
1922                               void* dst, size_t maxDstSize,
1923                         const void* seqStart, size_t seqSize,
1924                         const BYTE* litStart, size_t litSize)
1925{
1926    dctx_t* dctx = (dctx_t*)ctx;
1927    const BYTE* ip = (const BYTE*)seqStart;
1928    const BYTE* const iend = ip + seqSize;
1929    BYTE* const ostart = (BYTE* const)dst;
1930    BYTE* op = ostart;
1931    BYTE* const oend = ostart + maxDstSize;
1932    size_t errorCode, dumpsLength;
1933    const BYTE* litPtr = litStart;
1934    const BYTE* const litEnd = litStart + litSize;
1935    int nbSeq;
1936    const BYTE* dumps;
1937    U32* DTableLL = dctx->LLTable;
1938    U32* DTableML = dctx->MLTable;
1939    U32* DTableOffb = dctx->OffTable;
1940    BYTE* const base = (BYTE*) (dctx->base);
1941
1942    /* Build Decoding Tables */
1943    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1944                                      DTableLL, DTableML, DTableOffb,
1945                                      ip, iend-ip);
1946    if (ZSTD_isError(errorCode)) return errorCode;
1947    ip += errorCode;
1948
1949    /* Regen sequences */
1950    {
1951        seq_t sequence;
1952        seqState_t seqState;
1953
1954        memset(&sequence, 0, sizeof(sequence));
1955        seqState.dumps = dumps;
1956        seqState.dumpsEnd = dumps + dumpsLength;
1957        seqState.prevOffset = 1;
1958        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1959        if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption;
1960        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1961        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1962        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1963
1964        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1965        {
1966            size_t oneSeqSize;
1967            nbSeq--;
1968            ZSTD_decodeSequence(&sequence, &seqState);
1969            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1970            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1971            op += oneSeqSize;
1972        }
1973
1974        /* check if reached exact end */
1975        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return (size_t)-ZSTD_ERROR_corruption;   /* requested too much : data is corrupted */
1976        if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption;   /* requested too many sequences : data is corrupted */
1977
1978        /* last literal segment */
1979        {
1980            size_t lastLLSize = litEnd - litPtr;
1981            if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1982            if (op != litPtr) memmove(op, litPtr, lastLLSize);
1983            op += lastLLSize;
1984        }
1985    }
1986
1987    return op-ostart;
1988}
1989
1990
1991static size_t ZSTD_decompressBlock(
1992                            void* ctx,
1993                            void* dst, size_t maxDstSize,
1994                      const void* src, size_t srcSize)
1995{
1996    /* blockType == blockCompressed, srcSize is trusted */
1997    const BYTE* ip = (const BYTE*)src;
1998    const BYTE* litPtr = NULL;
1999    size_t litSize = 0;
2000    size_t errorCode;
2001
2002    /* Decode literals sub-block */
2003    errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
2004    if (ZSTD_isError(errorCode)) return errorCode;
2005    ip += errorCode;
2006    srcSize -= errorCode;
2007
2008    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
2009}
2010
2011
2012size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2013{
2014    const BYTE* ip = (const BYTE*)src;
2015    const BYTE* iend = ip + srcSize;
2016    BYTE* const ostart = (BYTE* const)dst;
2017    BYTE* op = ostart;
2018    BYTE* const oend = ostart + maxDstSize;
2019    size_t remainingSize = srcSize;
2020    U32 magicNumber;
2021    size_t errorCode=0;
2022    blockProperties_t blockProperties;
2023
2024    /* Frame Header */
2025    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
2026    magicNumber = ZSTD_readBE32(src);
2027    if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
2028    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2029
2030    /* Loop on each block */
2031    while (1)
2032    {
2033        size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
2034        if (ZSTD_isError(blockSize)) return blockSize;
2035
2036        ip += ZSTD_blockHeaderSize;
2037        remainingSize -= ZSTD_blockHeaderSize;
2038        if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
2039
2040        switch(blockProperties.blockType)
2041        {
2042        case bt_compressed:
2043            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
2044            break;
2045        case bt_raw :
2046            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
2047            break;
2048        case bt_rle :
2049            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet supported */
2050            break;
2051        case bt_end :
2052            /* end of frame */
2053            if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
2054            break;
2055        default:
2056            return (size_t)-ZSTD_ERROR_GENERIC;
2057        }
2058        if (blockSize == 0) break;   /* bt_end */
2059
2060        if (ZSTD_isError(errorCode)) return errorCode;
2061        op += errorCode;
2062        ip += blockSize;
2063        remainingSize -= blockSize;
2064    }
2065
2066    return op-ostart;
2067}
2068
2069size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2070{
2071    dctx_t ctx;
2072    ctx.base = dst;
2073    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2074}
2075
2076
2077/*******************************
2078*  Streaming Decompression API
2079*******************************/
2080
2081size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2082{
2083    dctx->expected = ZSTD_frameHeaderSize;
2084    dctx->phase = 0;
2085    dctx->previousDstEnd = NULL;
2086    dctx->base = NULL;
2087    return 0;
2088}
2089
2090ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2091{
2092    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2093    if (dctx==NULL) return NULL;
2094    ZSTDv01_resetDCtx(dctx);
2095    return dctx;
2096}
2097
2098size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2099{
2100    free(dctx);
2101    return 0;
2102}
2103
2104size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2105{
2106    return ((dctx_t*)dctx)->expected;
2107}
2108
2109size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2110{
2111    dctx_t* ctx = (dctx_t*)dctx;
2112
2113    /* Sanity check */
2114    if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize;
2115    if (dst != ctx->previousDstEnd)  /* not contiguous */
2116        ctx->base = dst;
2117
2118    /* Decompress : frame header */
2119    if (ctx->phase == 0)
2120    {
2121        /* Check frame magic header */
2122        U32 magicNumber = ZSTD_readBE32(src);
2123        if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
2124        ctx->phase = 1;
2125        ctx->expected = ZSTD_blockHeaderSize;
2126        return 0;
2127    }
2128
2129    /* Decompress : block header */
2130    if (ctx->phase == 1)
2131    {
2132        blockProperties_t bp;
2133        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2134        if (ZSTD_isError(blockSize)) return blockSize;
2135        if (bp.blockType == bt_end)
2136        {
2137            ctx->expected = 0;
2138            ctx->phase = 0;
2139        }
2140        else
2141        {
2142            ctx->expected = blockSize;
2143            ctx->bType = bp.blockType;
2144            ctx->phase = 2;
2145        }
2146
2147        return 0;
2148    }
2149
2150    /* Decompress : block content */
2151    {
2152        size_t rSize;
2153        switch(ctx->bType)
2154        {
2155        case bt_compressed:
2156            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2157            break;
2158        case bt_raw :
2159            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2160            break;
2161        case bt_rle :
2162            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
2163            break;
2164        case bt_end :   /* should never happen (filtered at phase 1) */
2165            rSize = 0;
2166            break;
2167        default:
2168            return (size_t)-ZSTD_ERROR_GENERIC;
2169        }
2170        ctx->phase = 1;
2171        ctx->expected = ZSTD_blockHeaderSize;
2172        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2173        return rSize;
2174    }
2175
2176}
2177
2178
Note: See TracBrowser for help on using the repository browser.