feat(dir2config): defaults

This commit is contained in:
Mikaël Cluseau
2019-02-28 19:27:09 +11:00
parent d2b212ae6b
commit ea6fce68e1
383 changed files with 74236 additions and 41 deletions

View File

@ -0,0 +1,181 @@
package idxfile
import (
"bufio"
"bytes"
"errors"
"io"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
var (
// ErrUnsupportedVersion is returned by Decode when the idx file version
// is not supported.
ErrUnsupportedVersion = errors.New("Unsuported version")
// ErrMalformedIdxFile is returned by Decode when the idx file is corrupted.
ErrMalformedIdxFile = errors.New("Malformed IDX file")
)
const (
fanout = 256
objectIDLength = 20
)
// Decoder reads and decodes idx files from an input stream.
type Decoder struct {
*bufio.Reader
}
// NewDecoder builds a new idx stream decoder, that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{bufio.NewReader(r)}
}
// Decode reads from the stream and decode the content into the MemoryIndex struct.
func (d *Decoder) Decode(idx *MemoryIndex) error {
if err := validateHeader(d); err != nil {
return err
}
flow := []func(*MemoryIndex, io.Reader) error{
readVersion,
readFanout,
readObjectNames,
readCRC32,
readOffsets,
readChecksums,
}
for _, f := range flow {
if err := f(idx, d); err != nil {
return err
}
}
return nil
}
func validateHeader(r io.Reader) error {
var h = make([]byte, 4)
if _, err := io.ReadFull(r, h); err != nil {
return err
}
if !bytes.Equal(h, idxHeader) {
return ErrMalformedIdxFile
}
return nil
}
func readVersion(idx *MemoryIndex, r io.Reader) error {
v, err := binary.ReadUint32(r)
if err != nil {
return err
}
if v > VersionSupported {
return ErrUnsupportedVersion
}
idx.Version = v
return nil
}
func readFanout(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
n, err := binary.ReadUint32(r)
if err != nil {
return err
}
idx.Fanout[k] = n
idx.FanoutMapping[k] = noMapping
}
return nil
}
func readObjectNames(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
var buckets uint32
if k == 0 {
buckets = idx.Fanout[k]
} else {
buckets = idx.Fanout[k] - idx.Fanout[k-1]
}
if buckets == 0 {
continue
}
if buckets < 0 {
return ErrMalformedIdxFile
}
idx.FanoutMapping[k] = len(idx.Names)
nameLen := int(buckets * objectIDLength)
bin := make([]byte, nameLen)
if _, err := io.ReadFull(r, bin); err != nil {
return err
}
idx.Names = append(idx.Names, bin)
idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4))
idx.CRC32 = append(idx.CRC32, make([]byte, buckets*4))
}
return nil
}
func readCRC32(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.CRC32[pos]); err != nil {
return err
}
}
}
return nil
}
func readOffsets(idx *MemoryIndex, r io.Reader) error {
var o64cnt int
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil {
return err
}
for p := 0; p < len(idx.Offset32[pos]); p += 4 {
if idx.Offset32[pos][p]&(byte(1)<<7) > 0 {
o64cnt++
}
}
}
}
if o64cnt > 0 {
idx.Offset64 = make([]byte, o64cnt*8)
if _, err := io.ReadFull(r, idx.Offset64); err != nil {
return err
}
}
return nil
}
func readChecksums(idx *MemoryIndex, r io.Reader) error {
if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil {
return err
}
if _, err := io.ReadFull(r, idx.IdxChecksum[:]); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,128 @@
// Package idxfile implements encoding and decoding of packfile idx files.
//
// == Original (version 1) pack-*.idx files have the following format:
//
// - The header consists of 256 4-byte network byte order
// integers. N-th entry of this table records the number of
// objects in the corresponding pack, the first byte of whose
// object name is less than or equal to N. This is called the
// 'first-level fan-out' table.
//
// - The header is followed by sorted 24-byte entries, one entry
// per object in the pack. Each entry is:
//
// 4-byte network byte order integer, recording where the
// object is stored in the packfile as the offset from the
// beginning.
//
// 20-byte object name.
//
// - The file is concluded with a trailer:
//
// A copy of the 20-byte SHA1 checksum at the end of
// corresponding packfile.
//
// 20-byte SHA1-checksum of all of the above.
//
// Pack Idx file:
//
// -- +--------------------------------+
// fanout | fanout[0] = 2 (for example) |-.
// table +--------------------------------+ |
// | fanout[1] | |
// +--------------------------------+ |
// | fanout[2] | |
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
// | fanout[255] = total objects |---.
// -- +--------------------------------+ | |
// main | offset | | |
// index | object name 00XXXXXXXXXXXXXXXX | | |
// tab +--------------------------------+ | |
// | offset | | |
// | object name 00XXXXXXXXXXXXXXXX | | |
// +--------------------------------+<+ |
// .-| offset | |
// | | object name 01XXXXXXXXXXXXXXXX | |
// | +--------------------------------+ |
// | | offset | |
// | | object name 01XXXXXXXXXXXXXXXX | |
// | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
// | | offset | |
// | | object name FFXXXXXXXXXXXXXXXX | |
// --| +--------------------------------+<--+
// trailer | | packfile checksum |
// | +--------------------------------+
// | | idxfile checksum |
// | +--------------------------------+
// .---------.
// |
// Pack file entry: <+
//
// packed object header:
// 1-byte size extension bit (MSB)
// type (next 3 bit)
// size0 (lower 4-bit)
// n-byte sizeN (as long as MSB is set, each 7-bit)
// size0..sizeN form 4+7+7+..+7 bit integer, size0
// is the least significant part, and sizeN is the
// most significant part.
// packed object data:
// If it is not DELTA, then deflated bytes (the size above
// is the size before compression).
// If it is REF_DELTA, then
// 20-byte base object name SHA1 (the size above is the
// size of the delta data that follows).
// delta data, deflated.
// If it is OFS_DELTA, then
// n-byte offset (see below) interpreted as a negative
// offset from the type-byte of the header of the
// ofs-delta entry (the size above is the size of
// the delta data that follows).
// delta data, deflated.
//
// offset encoding:
// n bytes with MSB set in all but the last one.
// The offset is then the number constructed by
// concatenating the lower 7 bit of each byte, and
// for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1))
// to the result.
//
// == Version 2 pack-*.idx files support packs larger than 4 GiB, and
// have some other reorganizations. They have the format:
//
// - A 4-byte magic number '\377tOc' which is an unreasonable
// fanout[0] value.
//
// - A 4-byte version number (= 2)
//
// - A 256-entry fan-out table just like v1.
//
// - A table of sorted 20-byte SHA1 object names. These are
// packed together without offset values to reduce the cache
// footprint of the binary search for a specific object name.
//
// - A table of 4-byte CRC32 values of the packed object data.
// This is new in v2 so compressed data can be copied directly
// from pack to pack during repacking without undetected
// data corruption.
//
// - A table of 4-byte offset values (in network byte order).
// These are usually 31-bit pack file offsets, but large
// offsets are encoded as an index into the next table with
// the msbit set.
//
// - A table of 8-byte offset entries (empty for pack files less
// than 2 GiB). Pack files are organized with heavily used
// objects toward the front, so most object references should
// not need to refer to this table.
//
// - The same trailer as a v1 pack file:
//
// A copy of the 20-byte SHA1 checksum at the end of
// corresponding packfile.
//
// 20-byte SHA1-checksum of all of the above.
//
// Source:
// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-format.txt
package idxfile

View File

@ -0,0 +1,142 @@
package idxfile
import (
"crypto/sha1"
"hash"
"io"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
}
// NewEncoder returns a new stream encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
h := sha1.New()
mw := io.MultiWriter(w, h)
return &Encoder{mw, h}
}
// Encode encodes an MemoryIndex to the encoder writer.
func (e *Encoder) Encode(idx *MemoryIndex) (int, error) {
flow := []func(*MemoryIndex) (int, error){
e.encodeHeader,
e.encodeFanout,
e.encodeHashes,
e.encodeCRC32,
e.encodeOffsets,
e.encodeChecksums,
}
sz := 0
for _, f := range flow {
i, err := f(idx)
sz += i
if err != nil {
return sz, err
}
}
return sz, nil
}
func (e *Encoder) encodeHeader(idx *MemoryIndex) (int, error) {
c, err := e.Write(idxHeader)
if err != nil {
return c, err
}
return c + 4, binary.WriteUint32(e, idx.Version)
}
func (e *Encoder) encodeFanout(idx *MemoryIndex) (int, error) {
for _, c := range idx.Fanout {
if err := binary.WriteUint32(e, c); err != nil {
return 0, err
}
}
return fanout * 4, nil
}
func (e *Encoder) encodeHashes(idx *MemoryIndex) (int, error) {
var size int
for k := 0; k < fanout; k++ {
pos := idx.FanoutMapping[k]
if pos == noMapping {
continue
}
n, err := e.Write(idx.Names[pos])
if err != nil {
return size, err
}
size += n
}
return size, nil
}
func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) {
var size int
for k := 0; k < fanout; k++ {
pos := idx.FanoutMapping[k]
if pos == noMapping {
continue
}
n, err := e.Write(idx.CRC32[pos])
if err != nil {
return size, err
}
size += n
}
return size, nil
}
func (e *Encoder) encodeOffsets(idx *MemoryIndex) (int, error) {
var size int
for k := 0; k < fanout; k++ {
pos := idx.FanoutMapping[k]
if pos == noMapping {
continue
}
n, err := e.Write(idx.Offset32[pos])
if err != nil {
return size, err
}
size += n
}
if len(idx.Offset64) > 0 {
n, err := e.Write(idx.Offset64)
if err != nil {
return size, err
}
size += n
}
return size, nil
}
func (e *Encoder) encodeChecksums(idx *MemoryIndex) (int, error) {
if _, err := e.Write(idx.PackfileChecksum[:]); err != nil {
return 0, err
}
copy(idx.IdxChecksum[:], e.hash.Sum(nil)[:20])
if _, err := e.Write(idx.IdxChecksum[:]); err != nil {
return 0, err
}
return 40, nil
}

View File

@ -0,0 +1,347 @@
package idxfile
import (
"bytes"
"io"
"sort"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
const (
// VersionSupported is the only idx version supported.
VersionSupported = 2
noMapping = -1
)
var (
idxHeader = []byte{255, 't', 'O', 'c'}
)
// Index represents an index of a packfile.
type Index interface {
// Contains checks whether the given hash is in the index.
Contains(h plumbing.Hash) (bool, error)
// FindOffset finds the offset in the packfile for the object with
// the given hash.
FindOffset(h plumbing.Hash) (int64, error)
// FindCRC32 finds the CRC32 of the object with the given hash.
FindCRC32(h plumbing.Hash) (uint32, error)
// FindHash finds the hash for the object with the given offset.
FindHash(o int64) (plumbing.Hash, error)
// Count returns the number of entries in the index.
Count() (int64, error)
// Entries returns an iterator to retrieve all index entries.
Entries() (EntryIter, error)
// EntriesByOffset returns an iterator to retrieve all index entries ordered
// by offset.
EntriesByOffset() (EntryIter, error)
}
// MemoryIndex is the in memory representation of an idx file.
type MemoryIndex struct {
Version uint32
Fanout [256]uint32
// FanoutMapping maps the position in the fanout table to the position
// in the Names, Offset32 and CRC32 slices. This improves the memory
// usage by not needing an array with unnecessary empty slots.
FanoutMapping [256]int
Names [][]byte
Offset32 [][]byte
CRC32 [][]byte
Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte
offsetHash map[int64]plumbing.Hash
}
var _ Index = (*MemoryIndex)(nil)
// NewMemoryIndex returns an instance of a new MemoryIndex.
func NewMemoryIndex() *MemoryIndex {
return &MemoryIndex{}
}
func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) {
k := idx.FanoutMapping[h[0]]
if k == noMapping {
return 0, false
}
if len(idx.Names) <= k {
return 0, false
}
data := idx.Names[k]
high := uint64(len(idx.Offset32[k])) >> 2
if high == 0 {
return 0, false
}
low := uint64(0)
for {
mid := (low + high) >> 1
offset := mid * objectIDLength
cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
if cmp < 0 {
high = mid
} else if cmp == 0 {
return int(mid), true
} else {
low = mid + 1
}
if low >= high {
break
}
}
return 0, false
}
// Contains implements the Index interface.
func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
_, ok := idx.findHashIndex(h)
return ok, nil
}
// FindOffset implements the Index interface.
func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
if len(idx.FanoutMapping) <= int(h[0]) {
return 0, plumbing.ErrObjectNotFound
}
k := idx.FanoutMapping[h[0]]
i, ok := idx.findHashIndex(h)
if !ok {
return 0, plumbing.ErrObjectNotFound
}
return idx.getOffset(k, i)
}
const isO64Mask = uint64(1) << 31
func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) {
offset := secondLevel << 2
buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4])
ofs, err := binary.ReadUint32(buf)
if err != nil {
return -1, err
}
if (uint64(ofs) & isO64Mask) != 0 {
offset := 8 * (uint64(ofs) & ^isO64Mask)
buf := bytes.NewBuffer(idx.Offset64[offset : offset+8])
n, err := binary.ReadUint64(buf)
if err != nil {
return -1, err
}
return int64(n), nil
}
return int64(ofs), nil
}
// FindCRC32 implements the Index interface.
func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
k := idx.FanoutMapping[h[0]]
i, ok := idx.findHashIndex(h)
if !ok {
return 0, plumbing.ErrObjectNotFound
}
return idx.getCRC32(k, i)
}
func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) (uint32, error) {
offset := secondLevel << 2
buf := bytes.NewBuffer(idx.CRC32[firstLevel][offset : offset+4])
return binary.ReadUint32(buf)
}
// FindHash implements the Index interface.
func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) {
// Lazily generate the reverse offset/hash map if required.
if idx.offsetHash == nil {
if err := idx.genOffsetHash(); err != nil {
return plumbing.ZeroHash, err
}
}
hash, ok := idx.offsetHash[o]
if !ok {
return plumbing.ZeroHash, plumbing.ErrObjectNotFound
}
return hash, nil
}
// genOffsetHash generates the offset/hash mapping for reverse search.
func (idx *MemoryIndex) genOffsetHash() error {
count, err := idx.Count()
if err != nil {
return err
}
idx.offsetHash = make(map[int64]plumbing.Hash, count)
iter, err := idx.Entries()
if err != nil {
return err
}
for {
entry, err := iter.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
idx.offsetHash[int64(entry.Offset)] = entry.Hash
}
}
// Count implements the Index interface.
func (idx *MemoryIndex) Count() (int64, error) {
return int64(idx.Fanout[fanout-1]), nil
}
// Entries implements the Index interface.
func (idx *MemoryIndex) Entries() (EntryIter, error) {
return &idxfileEntryIter{idx, 0, 0, 0}, nil
}
// EntriesByOffset implements the Index interface.
func (idx *MemoryIndex) EntriesByOffset() (EntryIter, error) {
count, err := idx.Count()
if err != nil {
return nil, err
}
iter := &idxfileEntryOffsetIter{
entries: make(entriesByOffset, count),
}
entries, err := idx.Entries()
if err != nil {
return nil, err
}
for pos := 0; int64(pos) < count; pos++ {
entry, err := entries.Next()
if err != nil {
return nil, err
}
iter.entries[pos] = entry
}
sort.Sort(iter.entries)
return iter, nil
}
// EntryIter is an iterator that will return the entries in a packfile index.
type EntryIter interface {
// Next returns the next entry in the packfile index.
Next() (*Entry, error)
// Close closes the iterator.
Close() error
}
type idxfileEntryIter struct {
idx *MemoryIndex
total int
firstLevel, secondLevel int
}
func (i *idxfileEntryIter) Next() (*Entry, error) {
for {
if i.firstLevel >= fanout {
return nil, io.EOF
}
if i.total >= int(i.idx.Fanout[i.firstLevel]) {
i.firstLevel++
i.secondLevel = 0
continue
}
entry := new(Entry)
ofs := i.secondLevel * objectIDLength
copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:])
pos := i.idx.FanoutMapping[entry.Hash[0]]
offset, err := i.idx.getOffset(pos, i.secondLevel)
if err != nil {
return nil, err
}
entry.Offset = uint64(offset)
entry.CRC32, err = i.idx.getCRC32(pos, i.secondLevel)
if err != nil {
return nil, err
}
i.secondLevel++
i.total++
return entry, nil
}
}
func (i *idxfileEntryIter) Close() error {
i.firstLevel = fanout
return nil
}
// Entry is the in memory representation of an object entry in the idx file.
type Entry struct {
Hash plumbing.Hash
CRC32 uint32
Offset uint64
}
type idxfileEntryOffsetIter struct {
entries entriesByOffset
pos int
}
func (i *idxfileEntryOffsetIter) Next() (*Entry, error) {
if i.pos >= len(i.entries) {
return nil, io.EOF
}
entry := i.entries[i.pos]
i.pos++
return entry, nil
}
func (i *idxfileEntryOffsetIter) Close() error {
i.pos = len(i.entries) + 1
return nil
}
type entriesByOffset []*Entry
func (o entriesByOffset) Len() int {
return len(o)
}
func (o entriesByOffset) Less(i int, j int) bool {
return o[i].Offset < o[j].Offset
}
func (o entriesByOffset) Swap(i int, j int) {
o[i], o[j] = o[j], o[i]
}

View File

@ -0,0 +1,186 @@
package idxfile
import (
"bytes"
"fmt"
"math"
"sort"
"sync"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
// objects implements sort.Interface and uses hash as sorting key.
type objects []Entry
// Writer implements a packfile Observer interface and is used to generate
// indexes.
type Writer struct {
m sync.Mutex
count uint32
checksum plumbing.Hash
objects objects
offset64 uint32
finished bool
index *MemoryIndex
added map[plumbing.Hash]struct{}
}
// Index returns a previously created MemoryIndex or creates a new one if
// needed.
func (w *Writer) Index() (*MemoryIndex, error) {
w.m.Lock()
defer w.m.Unlock()
if w.index == nil {
return w.createIndex()
}
return w.index, nil
}
// Add appends new object data.
func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) {
w.m.Lock()
defer w.m.Unlock()
if w.added == nil {
w.added = make(map[plumbing.Hash]struct{})
}
if _, ok := w.added[h]; !ok {
w.added[h] = struct{}{}
w.objects = append(w.objects, Entry{h, crc, pos})
}
}
func (w *Writer) Finished() bool {
return w.finished
}
// OnHeader implements packfile.Observer interface.
func (w *Writer) OnHeader(count uint32) error {
w.count = count
w.objects = make(objects, 0, count)
return nil
}
// OnInflatedObjectHeader implements packfile.Observer interface.
func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error {
return nil
}
// OnInflatedObjectContent implements packfile.Observer interface.
func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error {
w.Add(h, uint64(pos), crc)
return nil
}
// OnFooter implements packfile.Observer interface.
func (w *Writer) OnFooter(h plumbing.Hash) error {
w.checksum = h
w.finished = true
_, err := w.createIndex()
if err != nil {
return err
}
return nil
}
// creatIndex returns a filled MemoryIndex with the information filled by
// the observer callbacks.
func (w *Writer) createIndex() (*MemoryIndex, error) {
if !w.finished {
return nil, fmt.Errorf("the index still hasn't finished building")
}
idx := new(MemoryIndex)
w.index = idx
sort.Sort(w.objects)
// unmap all fans by default
for i := range idx.FanoutMapping {
idx.FanoutMapping[i] = noMapping
}
buf := new(bytes.Buffer)
last := -1
bucket := -1
for i, o := range w.objects {
fan := o.Hash[0]
// fill the gaps between fans
for j := last + 1; j < int(fan); j++ {
idx.Fanout[j] = uint32(i)
}
// update the number of objects for this position
idx.Fanout[fan] = uint32(i + 1)
// we move from one bucket to another, update counters and allocate
// memory
if last != int(fan) {
bucket++
idx.FanoutMapping[fan] = bucket
last = int(fan)
idx.Names = append(idx.Names, make([]byte, 0))
idx.Offset32 = append(idx.Offset32, make([]byte, 0))
idx.CRC32 = append(idx.CRC32, make([]byte, 0))
}
idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...)
offset := o.Offset
if offset > math.MaxInt32 {
offset = w.addOffset64(offset)
}
buf.Truncate(0)
binary.WriteUint32(buf, uint32(offset))
idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...)
buf.Truncate(0)
binary.WriteUint32(buf, uint32(o.CRC32))
idx.CRC32[bucket] = append(idx.CRC32[bucket], buf.Bytes()...)
}
for j := last + 1; j < 256; j++ {
idx.Fanout[j] = uint32(len(w.objects))
}
idx.Version = VersionSupported
idx.PackfileChecksum = w.checksum
return idx, nil
}
func (w *Writer) addOffset64(pos uint64) uint64 {
buf := new(bytes.Buffer)
binary.WriteUint64(buf, pos)
w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...)
index := uint64(w.offset64 | (1 << 31))
w.offset64++
return index
}
func (o objects) Len() int {
return len(o)
}
func (o objects) Less(i int, j int) bool {
cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:])
return cmp < 0
}
func (o objects) Swap(i int, j int) {
o[i], o[j] = o[j], o[i]
}