forgejo/vendor/github.com/dsnet/compress/internal/prefix/decoder.go

// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.

package prefix

import (
	"sort"

	"github.com/dsnet/compress/internal"
)

// The algorithm used to decode variable length codes is based on the lookup
// method in zlib. If the code is less-than-or-equal to maxChunkBits,
// then the symbol can be decoded using a single lookup into the chunks table.
// Otherwise, the links table will be used for a second level lookup.
//
// The chunks slice is keyed by the contents of the bit buffer ANDed with
// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
// that is decoded as follow:
//
//	var length = chunks[bitBuffer&chunkMask] & countMask
//	var symbol = chunks[bitBuffer&chunkMask] >> countBits
//
// If the decoded length is larger than chunkBits, then an overflow link table
// must be used for further decoding. In this case, the symbol is actually the
// index into the links tables. The second-level links table returned is
// processed in the same way as the chunks table.
//
//	if length > chunkBits {
//		var index = symbol // Previous symbol is index into links tables
//		length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
//		symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
//	}
//
// See the following:
//	http://www.gzip.org/algorithm.txt

type Decoder struct {
	chunks    []uint32   // First-level lookup map
	links     [][]uint32 // Second-level lookup map
	chunkMask uint32     // Mask the length of the chunks table
	linkMask  uint32     // Mask the length of the link table
	chunkBits uint32     // Bit-length of the chunks table

	MinBits uint32 // The minimum number of bits to safely make progress
	NumSyms uint32 // Number of symbols
}

// Init initializes Decoder according to the codes provided.
func (pd *Decoder) Init(codes PrefixCodes) {
	// Handle special case trees.
	if len(codes) <= 1 {
		switch {
		case len(codes) == 0: // Empty tree (should error if used later)
			*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
		case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
			pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
			*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
		default:
			panic("invalid codes")
		}
		return
	}
	if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
		panic("input codes is not sorted")
	}
	if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
		panic("detected incomplete or overlapping codes")
	}

	var minBits, maxBits uint32 = valueBits, 0
	for _, c := range codes {
		if minBits > c.Len {
			minBits = c.Len
		}
		if maxBits < c.Len {
			maxBits = c.Len
		}
	}

	// Allocate chunks table as needed.
	const maxChunkBits = 9 // This can be tuned for better performance
	pd.NumSyms = uint32(len(codes))
	pd.MinBits = minBits
	pd.chunkBits = maxBits
	if pd.chunkBits > maxChunkBits {
		pd.chunkBits = maxChunkBits
	}
	numChunks := 1 << pd.chunkBits
	pd.chunks = allocUint32s(pd.chunks, numChunks)
	pd.chunkMask = uint32(numChunks - 1)

	// Allocate links tables as needed.
	pd.links = pd.links[:0]
	pd.linkMask = 0
	if pd.chunkBits < maxBits {
		numLinks := 1 << (maxBits - pd.chunkBits)
		pd.linkMask = uint32(numLinks - 1)

		var linkIdx uint32
		for i := range pd.chunks {
			pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
		}
		for _, c := range codes {
			if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
				pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
				linkIdx++
			}
		}

		pd.links = extendSliceUint32s(pd.links, int(linkIdx))
		linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
		for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
			pd.links[i] = linksFlat[j : j+numLinks]
		}
	}

	// Fill out chunks and links tables with values.
	for _, c := range codes {
		chunk := c.Sym<<countBits | c.Len
		if c.Len <= pd.chunkBits {
			skip := 1 << uint(c.Len)
			for j := int(c.Val); j < len(pd.chunks); j += skip {
				pd.chunks[j] = chunk
			}
		} else {
			linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
			links := pd.links[linkIdx]
			skip := 1 << uint(c.Len-pd.chunkBits)
			for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
				links[j] = chunk
			}
		}
	}
}
Dump: add output format tar and output to stdout (#10376) * Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net> 2020-06-05 16:47:39 -04:00			`// Copyright 2015, Joe Tsai. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE.md file.`

			`package prefix`

			`import (`
			`"sort"`

			`"github.com/dsnet/compress/internal"`
			`)`

			`// The algorithm used to decode variable length codes is based on the lookup`
			`// method in zlib. If the code is less-than-or-equal to maxChunkBits,`
			`// then the symbol can be decoded using a single lookup into the chunks table.`
			`// Otherwise, the links table will be used for a second level lookup.`
			`//`
			`// The chunks slice is keyed by the contents of the bit buffer ANDed with`
			`// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple`
			`// that is decoded as follow:`
			`//`
			`// var length = chunks[bitBuffer&chunkMask] & countMask`
			`// var symbol = chunks[bitBuffer&chunkMask] >> countBits`
			`//`
			`// If the decoded length is larger than chunkBits, then an overflow link table`
			`// must be used for further decoding. In this case, the symbol is actually the`
			`// index into the links tables. The second-level links table returned is`
			`// processed in the same way as the chunks table.`
			`//`
			`// if length > chunkBits {`
			`// var index = symbol // Previous symbol is index into links tables`
			`// length = links[index][bitBuffer>>chunkBits & linkMask] & countMask`
			`// symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits`
			`// }`
			`//`
			`// See the following:`
			`// http://www.gzip.org/algorithm.txt`

			`type Decoder struct {`
			`chunks []uint32 // First-level lookup map`
			`links [][]uint32 // Second-level lookup map`
			`chunkMask uint32 // Mask the length of the chunks table`
			`linkMask uint32 // Mask the length of the link table`
			`chunkBits uint32 // Bit-length of the chunks table`

			`MinBits uint32 // The minimum number of bits to safely make progress`
			`NumSyms uint32 // Number of symbols`
			`}`

			`// Init initializes Decoder according to the codes provided.`
			`func (pd *Decoder) Init(codes PrefixCodes) {`
			`// Handle special case trees.`
			`if len(codes) <= 1 {`
			`switch {`
			`case len(codes) == 0: // Empty tree (should error if used later)`
			`*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}`
			`case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)`
			`pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits\|0)`
			`*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}`
			`default:`
			`panic("invalid codes")`
			`}`
			`return`
			`}`
			`if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {`
			`panic("input codes is not sorted")`
			`}`
			`if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {`
			`panic("detected incomplete or overlapping codes")`
			`}`

			`var minBits, maxBits uint32 = valueBits, 0`
			`for _, c := range codes {`
			`if minBits > c.Len {`
			`minBits = c.Len`
			`}`
			`if maxBits < c.Len {`
			`maxBits = c.Len`
			`}`
			`}`

			`// Allocate chunks table as needed.`
			`const maxChunkBits = 9 // This can be tuned for better performance`
			`pd.NumSyms = uint32(len(codes))`
			`pd.MinBits = minBits`
			`pd.chunkBits = maxBits`
			`if pd.chunkBits > maxChunkBits {`
			`pd.chunkBits = maxChunkBits`
			`}`
			`numChunks := 1 << pd.chunkBits`
			`pd.chunks = allocUint32s(pd.chunks, numChunks)`
			`pd.chunkMask = uint32(numChunks - 1)`

			`// Allocate links tables as needed.`
			`pd.links = pd.links[:0]`
			`pd.linkMask = 0`
			`if pd.chunkBits < maxBits {`
			`numLinks := 1 << (maxBits - pd.chunkBits)`
			`pd.linkMask = uint32(numLinks - 1)`

			`var linkIdx uint32`
			`for i := range pd.chunks {`
			`pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized`
			`}`
			`for _, c := range codes {`
			`if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {`
			`pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) \| (pd.chunkBits + 1)`
			`linkIdx++`
			`}`
			`}`

			`pd.links = extendSliceUint32s(pd.links, int(linkIdx))`
			`linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))`
			`for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {`
			`pd.links[i] = linksFlat[j : j+numLinks]`
			`}`
			`}`

			`// Fill out chunks and links tables with values.`
			`for _, c := range codes {`
			`chunk := c.Sym<<countBits \| c.Len`
			`if c.Len <= pd.chunkBits {`
			`skip := 1 << uint(c.Len)`
			`for j := int(c.Val); j < len(pd.chunks); j += skip {`
			`pd.chunks[j] = chunk`
			`}`
			`} else {`
			`linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits`
			`links := pd.links[linkIdx]`
			`skip := 1 << uint(c.Len-pd.chunkBits)`
			`for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {`
			`links[j] = chunk`
			`}`
			`}`
			`}`
			`}`