inits/vendor/github.com/ulikunitz/xz/format.go
Mikaël Cluseau 21d3f45969 vendor
2018-07-06 19:13:18 +11:00

729 lines
16 KiB
Go

// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xz
import (
"bytes"
"crypto/sha256"
"errors"
"fmt"
"hash"
"hash/crc32"
"io"
"github.com/ulikunitz/xz/lzma"
)
// allZeros checks whether a given byte slice has only zeros.
func allZeros(p []byte) bool {
for _, c := range p {
if c != 0 {
return false
}
}
return true
}
// padLen returns the length of the padding required for the given
// argument.
func padLen(n int64) int {
k := int(n % 4)
if k > 0 {
k = 4 - k
}
return k
}
/*** Header ***/
// headerMagic stores the magic bytes for the header
var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00}
// HeaderLen provides the length of the xz file header.
const HeaderLen = 12
// Constants for the checksum methods supported by xz.
const (
CRC32 byte = 0x1
CRC64 = 0x4
SHA256 = 0xa
)
// errInvalidFlags indicates that flags are invalid.
var errInvalidFlags = errors.New("xz: invalid flags")
// verifyFlags returns the error errInvalidFlags if the value is
// invalid.
func verifyFlags(flags byte) error {
switch flags {
case CRC32, CRC64, SHA256:
return nil
default:
return errInvalidFlags
}
}
// flagstrings maps flag values to strings.
var flagstrings = map[byte]string{
CRC32: "CRC-32",
CRC64: "CRC-64",
SHA256: "SHA-256",
}
// flagString returns the string representation for the given flags.
func flagString(flags byte) string {
s, ok := flagstrings[flags]
if !ok {
return "invalid"
}
return s
}
// newHashFunc returns a function that creates hash instances for the
// hash method encoded in flags.
func newHashFunc(flags byte) (newHash func() hash.Hash, err error) {
switch flags {
case CRC32:
newHash = newCRC32
case CRC64:
newHash = newCRC64
case SHA256:
newHash = sha256.New
default:
err = errInvalidFlags
}
return
}
// header provides the actual content of the xz file header: the flags.
type header struct {
flags byte
}
// Errors returned by readHeader.
var errHeaderMagic = errors.New("xz: invalid header magic bytes")
// ValidHeader checks whether data is a correct xz file header. The
// length of data must be HeaderLen.
func ValidHeader(data []byte) bool {
var h header
err := h.UnmarshalBinary(data)
return err == nil
}
// String returns a string representation of the flags.
func (h header) String() string {
return flagString(h.flags)
}
// UnmarshalBinary reads header from the provided data slice.
func (h *header) UnmarshalBinary(data []byte) error {
// header length
if len(data) != HeaderLen {
return errors.New("xz: wrong file header length")
}
// magic header
if !bytes.Equal(headerMagic, data[:6]) {
return errHeaderMagic
}
// checksum
crc := crc32.NewIEEE()
crc.Write(data[6:8])
if uint32LE(data[8:]) != crc.Sum32() {
return errors.New("xz: invalid checksum for file header")
}
// stream flags
if data[6] != 0 {
return errInvalidFlags
}
flags := data[7]
if err := verifyFlags(flags); err != nil {
return err
}
h.flags = flags
return nil
}
// MarshalBinary generates the xz file header.
func (h *header) MarshalBinary() (data []byte, err error) {
if err = verifyFlags(h.flags); err != nil {
return nil, err
}
data = make([]byte, 12)
copy(data, headerMagic)
data[7] = h.flags
crc := crc32.NewIEEE()
crc.Write(data[6:8])
putUint32LE(data[8:], crc.Sum32())
return data, nil
}
/*** Footer ***/
// footerLen defines the length of the footer.
const footerLen = 12
// footerMagic contains the footer magic bytes.
var footerMagic = []byte{'Y', 'Z'}
// footer represents the content of the xz file footer.
type footer struct {
indexSize int64
flags byte
}
// String prints a string representation of the footer structure.
func (f footer) String() string {
return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize)
}
// Minimum and maximum for the size of the index (backward size).
const (
minIndexSize = 4
maxIndexSize = (1 << 32) * 4
)
// MarshalBinary converts footer values into an xz file footer. Note
// that the footer value is checked for correctness.
func (f *footer) MarshalBinary() (data []byte, err error) {
if err = verifyFlags(f.flags); err != nil {
return nil, err
}
if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) {
return nil, errors.New("xz: index size out of range")
}
if f.indexSize%4 != 0 {
return nil, errors.New(
"xz: index size not aligned to four bytes")
}
data = make([]byte, footerLen)
// backward size (index size)
s := (f.indexSize / 4) - 1
putUint32LE(data[4:], uint32(s))
// flags
data[9] = f.flags
// footer magic
copy(data[10:], footerMagic)
// CRC-32
crc := crc32.NewIEEE()
crc.Write(data[4:10])
putUint32LE(data, crc.Sum32())
return data, nil
}
// UnmarshalBinary sets the footer value by unmarshalling an xz file
// footer.
func (f *footer) UnmarshalBinary(data []byte) error {
if len(data) != footerLen {
return errors.New("xz: wrong footer length")
}
// magic bytes
if !bytes.Equal(data[10:], footerMagic) {
return errors.New("xz: footer magic invalid")
}
// CRC-32
crc := crc32.NewIEEE()
crc.Write(data[4:10])
if uint32LE(data) != crc.Sum32() {
return errors.New("xz: footer checksum error")
}
var g footer
// backward size (index size)
g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4
// flags
if data[8] != 0 {
return errInvalidFlags
}
g.flags = data[9]
if err := verifyFlags(g.flags); err != nil {
return err
}
*f = g
return nil
}
/*** Block Header ***/
// blockHeader represents the content of an xz block header.
type blockHeader struct {
compressedSize int64
uncompressedSize int64
filters []filter
}
// String converts the block header into a string.
func (h blockHeader) String() string {
var buf bytes.Buffer
first := true
if h.compressedSize >= 0 {
fmt.Fprintf(&buf, "compressed size %d", h.compressedSize)
first = false
}
if h.uncompressedSize >= 0 {
if !first {
buf.WriteString(" ")
}
fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize)
first = false
}
for _, f := range h.filters {
if !first {
buf.WriteString(" ")
}
fmt.Fprintf(&buf, "filter %s", f)
first = false
}
return buf.String()
}
// Masks for the block flags.
const (
filterCountMask = 0x03
compressedSizePresent = 0x40
uncompressedSizePresent = 0x80
reservedBlockFlags = 0x3C
)
// errIndexIndicator signals that an index indicator (0x00) has been found
// instead of an expected block header indicator.
var errIndexIndicator = errors.New("xz: found index indicator")
// readBlockHeader reads the block header.
func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) {
var buf bytes.Buffer
buf.Grow(20)
// block header size
z, err := io.CopyN(&buf, r, 1)
n = int(z)
if err != nil {
return nil, n, err
}
s := buf.Bytes()[0]
if s == 0 {
return nil, n, errIndexIndicator
}
// read complete header
headerLen := (int(s) + 1) * 4
buf.Grow(headerLen - 1)
z, err = io.CopyN(&buf, r, int64(headerLen-1))
n += int(z)
if err != nil {
return nil, n, err
}
// unmarshal block header
h = new(blockHeader)
if err = h.UnmarshalBinary(buf.Bytes()); err != nil {
return nil, n, err
}
return h, n, nil
}
// readSizeInBlockHeader reads the uncompressed or compressed size
// fields in the block header. The present value informs the function
// whether the respective field is actually present in the header.
func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) {
if !present {
return -1, nil
}
x, _, err := readUvarint(r)
if err != nil {
return 0, err
}
if x >= 1<<63 {
return 0, errors.New("xz: size overflow in block header")
}
return int64(x), nil
}
// UnmarshalBinary unmarshals the block header.
func (h *blockHeader) UnmarshalBinary(data []byte) error {
// Check header length
s := data[0]
if data[0] == 0 {
return errIndexIndicator
}
headerLen := (int(s) + 1) * 4
if len(data) != headerLen {
return fmt.Errorf("xz: data length %d; want %d", len(data),
headerLen)
}
n := headerLen - 4
// Check CRC-32
crc := crc32.NewIEEE()
crc.Write(data[:n])
if crc.Sum32() != uint32LE(data[n:]) {
return errors.New("xz: checksum error for block header")
}
// Block header flags
flags := data[1]
if flags&reservedBlockFlags != 0 {
return errors.New("xz: reserved block header flags set")
}
r := bytes.NewReader(data[2:n])
// Compressed size
var err error
h.compressedSize, err = readSizeInBlockHeader(
r, flags&compressedSizePresent != 0)
if err != nil {
return err
}
// Uncompressed size
h.uncompressedSize, err = readSizeInBlockHeader(
r, flags&uncompressedSizePresent != 0)
if err != nil {
return err
}
h.filters, err = readFilters(r, int(flags&filterCountMask)+1)
if err != nil {
return err
}
// Check padding
// Since headerLen is a multiple of 4 we don't need to check
// alignment.
k := r.Len()
// The standard spec says that the padding should have not more
// than 3 bytes. However we found paddings of 4 or 5 in the
// wild. See https://github.com/ulikunitz/xz/pull/11 and
// https://github.com/ulikunitz/xz/issues/15
//
// The only reasonable approach seems to be to ignore the
// padding size. We still check that all padding bytes are zero.
if !allZeros(data[n-k : n]) {
return errPadding
}
return nil
}
// MarshalBinary marshals the binary header.
func (h *blockHeader) MarshalBinary() (data []byte, err error) {
if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) {
return nil, errors.New("xz: filter count wrong")
}
for i, f := range h.filters {
if i < len(h.filters)-1 {
if f.id() == lzmaFilterID {
return nil, errors.New(
"xz: LZMA2 filter is not the last")
}
} else {
// last filter
if f.id() != lzmaFilterID {
return nil, errors.New("xz: " +
"last filter must be the LZMA2 filter")
}
}
}
var buf bytes.Buffer
// header size must set at the end
buf.WriteByte(0)
// flags
flags := byte(len(h.filters) - 1)
if h.compressedSize >= 0 {
flags |= compressedSizePresent
}
if h.uncompressedSize >= 0 {
flags |= uncompressedSizePresent
}
buf.WriteByte(flags)
p := make([]byte, 10)
if h.compressedSize >= 0 {
k := putUvarint(p, uint64(h.compressedSize))
buf.Write(p[:k])
}
if h.uncompressedSize >= 0 {
k := putUvarint(p, uint64(h.uncompressedSize))
buf.Write(p[:k])
}
for _, f := range h.filters {
fp, err := f.MarshalBinary()
if err != nil {
return nil, err
}
buf.Write(fp)
}
// padding
for i := padLen(int64(buf.Len())); i > 0; i-- {
buf.WriteByte(0)
}
// crc place holder
buf.Write(p[:4])
data = buf.Bytes()
if len(data)%4 != 0 {
panic("data length not aligned")
}
s := len(data)/4 - 1
if !(1 < s && s <= 255) {
panic("wrong block header size")
}
data[0] = byte(s)
crc := crc32.NewIEEE()
crc.Write(data[:len(data)-4])
putUint32LE(data[len(data)-4:], crc.Sum32())
return data, nil
}
// Constants used for marshalling and unmarshalling filters in the xz
// block header.
const (
minFilters = 1
maxFilters = 4
minReservedID = 1 << 62
)
// filter represents a filter in the block header.
type filter interface {
id() uint64
UnmarshalBinary(data []byte) error
MarshalBinary() (data []byte, err error)
reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error)
writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error)
// filter must be last filter
last() bool
}
// readFilter reads a block filter from the block header. At this point
// in time only the LZMA2 filter is supported.
func readFilter(r io.Reader) (f filter, err error) {
br := lzma.ByteReader(r)
// index
id, _, err := readUvarint(br)
if err != nil {
return nil, err
}
var data []byte
switch id {
case lzmaFilterID:
data = make([]byte, lzmaFilterLen)
data[0] = lzmaFilterID
if _, err = io.ReadFull(r, data[1:]); err != nil {
return nil, err
}
f = new(lzmaFilter)
default:
if id >= minReservedID {
return nil, errors.New(
"xz: reserved filter id in block stream header")
}
return nil, errors.New("xz: invalid filter id")
}
if err = f.UnmarshalBinary(data); err != nil {
return nil, err
}
return f, err
}
// readFilters reads count filters. At this point in time only the count
// 1 is supported.
func readFilters(r io.Reader, count int) (filters []filter, err error) {
if count != 1 {
return nil, errors.New("xz: unsupported filter count")
}
f, err := readFilter(r)
if err != nil {
return nil, err
}
return []filter{f}, err
}
// writeFilters writes the filters.
func writeFilters(w io.Writer, filters []filter) (n int, err error) {
for _, f := range filters {
p, err := f.MarshalBinary()
if err != nil {
return n, err
}
k, err := w.Write(p)
n += k
if err != nil {
return n, err
}
}
return n, nil
}
/*** Index ***/
// record describes a block in the xz file index.
type record struct {
unpaddedSize int64
uncompressedSize int64
}
// readRecord reads an index record.
func readRecord(r io.ByteReader) (rec record, n int, err error) {
u, k, err := readUvarint(r)
n += k
if err != nil {
return rec, n, err
}
rec.unpaddedSize = int64(u)
if rec.unpaddedSize < 0 {
return rec, n, errors.New("xz: unpadded size negative")
}
u, k, err = readUvarint(r)
n += k
if err != nil {
return rec, n, err
}
rec.uncompressedSize = int64(u)
if rec.uncompressedSize < 0 {
return rec, n, errors.New("xz: uncompressed size negative")
}
return rec, n, nil
}
// MarshalBinary converts an index record in its binary encoding.
func (rec *record) MarshalBinary() (data []byte, err error) {
// maximum length of a uvarint is 10
p := make([]byte, 20)
n := putUvarint(p, uint64(rec.unpaddedSize))
n += putUvarint(p[n:], uint64(rec.uncompressedSize))
return p[:n], nil
}
// writeIndex writes the index, a sequence of records.
func writeIndex(w io.Writer, index []record) (n int64, err error) {
crc := crc32.NewIEEE()
mw := io.MultiWriter(w, crc)
// index indicator
k, err := mw.Write([]byte{0})
n += int64(k)
if err != nil {
return n, err
}
// number of records
p := make([]byte, 10)
k = putUvarint(p, uint64(len(index)))
k, err = mw.Write(p[:k])
n += int64(k)
if err != nil {
return n, err
}
// list of records
for _, rec := range index {
p, err := rec.MarshalBinary()
if err != nil {
return n, err
}
k, err = mw.Write(p)
n += int64(k)
if err != nil {
return n, err
}
}
// index padding
k, err = mw.Write(make([]byte, padLen(int64(n))))
n += int64(k)
if err != nil {
return n, err
}
// crc32 checksum
putUint32LE(p, crc.Sum32())
k, err = w.Write(p[:4])
n += int64(k)
return n, err
}
// readIndexBody reads the index from the reader. It assumes that the
// index indicator has already been read.
func readIndexBody(r io.Reader) (records []record, n int64, err error) {
crc := crc32.NewIEEE()
// index indicator
crc.Write([]byte{0})
br := lzma.ByteReader(io.TeeReader(r, crc))
// number of records
u, k, err := readUvarint(br)
n += int64(k)
if err != nil {
return nil, n, err
}
recLen := int(u)
if recLen < 0 || uint64(recLen) != u {
return nil, n, errors.New("xz: record number overflow")
}
// list of records
records = make([]record, recLen)
for i := range records {
records[i], k, err = readRecord(br)
n += int64(k)
if err != nil {
return nil, n, err
}
}
p := make([]byte, padLen(int64(n+1)), 4)
k, err = io.ReadFull(br.(io.Reader), p)
n += int64(k)
if err != nil {
return nil, n, err
}
if !allZeros(p) {
return nil, n, errors.New("xz: non-zero byte in index padding")
}
// crc32
s := crc.Sum32()
p = p[:4]
k, err = io.ReadFull(br.(io.Reader), p)
n += int64(k)
if err != nil {
return records, n, err
}
if uint32LE(p) != s {
return nil, n, errors.New("xz: wrong checksum for index")
}
return records, n, nil
}