feat(dir2config): defaults

This commit is contained in:
Mikaël Cluseau
2019-02-28 19:27:09 +11:00
parent d2b212ae6b
commit ea6fce68e1
383 changed files with 74236 additions and 41 deletions

View File

@ -0,0 +1,99 @@
package config
// New creates a new config instance.
func New() *Config {
return &Config{}
}
// Config contains all the sections, comments and includes from a config file.
type Config struct {
Comment *Comment
Sections Sections
Includes Includes
}
// Includes is a list of Includes in a config file.
type Includes []*Include
// Include is a reference to an included config file.
type Include struct {
Path string
Config *Config
}
// Comment string without the prefix '#' or ';'.
type Comment string
const (
// NoSubsection token is passed to Config.Section and Config.SetSection to
// represent the absence of a section.
NoSubsection = ""
)
// Section returns a existing section with the given name or creates a new one.
func (c *Config) Section(name string) *Section {
for i := len(c.Sections) - 1; i >= 0; i-- {
s := c.Sections[i]
if s.IsName(name) {
return s
}
}
s := &Section{Name: name}
c.Sections = append(c.Sections, s)
return s
}
// AddOption adds an option to a given section and subsection. Use the
// NoSubsection constant for the subsection argument if no subsection is wanted.
func (c *Config) AddOption(section string, subsection string, key string, value string) *Config {
if subsection == "" {
c.Section(section).AddOption(key, value)
} else {
c.Section(section).Subsection(subsection).AddOption(key, value)
}
return c
}
// SetOption sets an option to a given section and subsection. Use the
// NoSubsection constant for the subsection argument if no subsection is wanted.
func (c *Config) SetOption(section string, subsection string, key string, value string) *Config {
if subsection == "" {
c.Section(section).SetOption(key, value)
} else {
c.Section(section).Subsection(subsection).SetOption(key, value)
}
return c
}
// RemoveSection removes a section from a config file.
func (c *Config) RemoveSection(name string) *Config {
result := Sections{}
for _, s := range c.Sections {
if !s.IsName(name) {
result = append(result, s)
}
}
c.Sections = result
return c
}
// RemoveSubsection remove s a subsection from a config file.
func (c *Config) RemoveSubsection(section string, subsection string) *Config {
for _, s := range c.Sections {
if s.IsName(section) {
result := Subsections{}
for _, ss := range s.Subsections {
if !ss.IsName(subsection) {
result = append(result, ss)
}
}
s.Subsections = result
}
}
return c
}

View File

@ -0,0 +1,37 @@
package config
import (
"io"
"github.com/src-d/gcfg"
)
// A Decoder reads and decodes config files from an input stream.
type Decoder struct {
io.Reader
}
// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r}
}
// Decode reads the whole config from its input and stores it in the
// value pointed to by config.
func (d *Decoder) Decode(config *Config) error {
cb := func(s string, ss string, k string, v string, bv bool) error {
if ss == "" && k == "" {
config.Section(s)
return nil
}
if ss != "" && k == "" {
config.Section(s).Subsection(ss)
return nil
}
config.AddOption(s, ss, k, v)
return nil
}
return gcfg.ReadWithCallback(d, cb)
}

View File

@ -0,0 +1,122 @@
// Package config implements encoding and decoding of git config files.
//
// Configuration File
// ------------------
//
// The Git configuration file contains a number of variables that affect
// the Git commands' behavior. The `.git/config` file in each repository
// is used to store the configuration for that repository, and
// `$HOME/.gitconfig` is used to store a per-user configuration as
// fallback values for the `.git/config` file. The file `/etc/gitconfig`
// can be used to store a system-wide default configuration.
//
// The configuration variables are used by both the Git plumbing
// and the porcelains. The variables are divided into sections, wherein
// the fully qualified variable name of the variable itself is the last
// dot-separated segment and the section name is everything before the last
// dot. The variable names are case-insensitive, allow only alphanumeric
// characters and `-`, and must start with an alphabetic character. Some
// variables may appear multiple times; we say then that the variable is
// multivalued.
//
// Syntax
// ~~~~~~
//
// The syntax is fairly flexible and permissive; whitespaces are mostly
// ignored. The '#' and ';' characters begin comments to the end of line,
// blank lines are ignored.
//
// The file consists of sections and variables. A section begins with
// the name of the section in square brackets and continues until the next
// section begins. Section names are case-insensitive. Only alphanumeric
// characters, `-` and `.` are allowed in section names. Each variable
// must belong to some section, which means that there must be a section
// header before the first setting of a variable.
//
// Sections can be further divided into subsections. To begin a subsection
// put its name in double quotes, separated by space from the section name,
// in the section header, like in the example below:
//
// --------
// [section "subsection"]
//
// --------
//
// Subsection names are case sensitive and can contain any characters except
// newline (doublequote `"` and backslash can be included by escaping them
// as `\"` and `\\`, respectively). Section headers cannot span multiple
// lines. Variables may belong directly to a section or to a given subsection.
// You can have `[section]` if you have `[section "subsection"]`, but you
// don't need to.
//
// There is also a deprecated `[section.subsection]` syntax. With this
// syntax, the subsection name is converted to lower-case and is also
// compared case sensitively. These subsection names follow the same
// restrictions as section names.
//
// All the other lines (and the remainder of the line after the section
// header) are recognized as setting variables, in the form
// 'name = value' (or just 'name', which is a short-hand to say that
// the variable is the boolean "true").
// The variable names are case-insensitive, allow only alphanumeric characters
// and `-`, and must start with an alphabetic character.
//
// A line that defines a value can be continued to the next line by
// ending it with a `\`; the backquote and the end-of-line are
// stripped. Leading whitespaces after 'name =', the remainder of the
// line after the first comment character '#' or ';', and trailing
// whitespaces of the line are discarded unless they are enclosed in
// double quotes. Internal whitespaces within the value are retained
// verbatim.
//
// Inside double quotes, double quote `"` and backslash `\` characters
// must be escaped: use `\"` for `"` and `\\` for `\`.
//
// The following escape sequences (beside `\"` and `\\`) are recognized:
// `\n` for newline character (NL), `\t` for horizontal tabulation (HT, TAB)
// and `\b` for backspace (BS). Other char escape sequences (including octal
// escape sequences) are invalid.
//
// Includes
// ~~~~~~~~
//
// You can include one config file from another by setting the special
// `include.path` variable to the name of the file to be included. The
// variable takes a pathname as its value, and is subject to tilde
// expansion.
//
// The included file is expanded immediately, as if its contents had been
// found at the location of the include directive. If the value of the
// `include.path` variable is a relative path, the path is considered to be
// relative to the configuration file in which the include directive was
// found. See below for examples.
//
//
// Example
// ~~~~~~~
//
// # Core variables
// [core]
// ; Don't trust file modes
// filemode = false
//
// # Our diff algorithm
// [diff]
// external = /usr/local/bin/diff-wrapper
// renames = true
//
// [branch "devel"]
// remote = origin
// merge = refs/heads/devel
//
// # Proxy settings
// [core]
// gitProxy="ssh" for "kernel.org"
// gitProxy=default-proxy ; for the rest
//
// [include]
// path = /path/to/foo.inc ; include by absolute path
// path = foo ; expand "foo" relative to the current file
// path = ~/foo ; expand "foo" in your `$HOME` directory
//
package config

View File

@ -0,0 +1,77 @@
package config
import (
"fmt"
"io"
"strings"
)
// An Encoder writes config files to an output stream.
type Encoder struct {
w io.Writer
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w}
}
// Encode writes the config in git config format to the stream of the encoder.
func (e *Encoder) Encode(cfg *Config) error {
for _, s := range cfg.Sections {
if err := e.encodeSection(s); err != nil {
return err
}
}
return nil
}
func (e *Encoder) encodeSection(s *Section) error {
if len(s.Options) > 0 {
if err := e.printf("[%s]\n", s.Name); err != nil {
return err
}
if err := e.encodeOptions(s.Options); err != nil {
return err
}
}
for _, ss := range s.Subsections {
if err := e.encodeSubsection(s.Name, ss); err != nil {
return err
}
}
return nil
}
func (e *Encoder) encodeSubsection(sectionName string, s *Subsection) error {
//TODO: escape
if err := e.printf("[%s \"%s\"]\n", sectionName, s.Name); err != nil {
return err
}
return e.encodeOptions(s.Options)
}
func (e *Encoder) encodeOptions(opts Options) error {
for _, o := range opts {
pattern := "\t%s = %s\n"
if strings.Contains(o.Value, "\\") {
pattern = "\t%s = %q\n"
}
if err := e.printf(pattern, o.Key, o.Value); err != nil {
return err
}
}
return nil
}
func (e *Encoder) printf(msg string, args ...interface{}) error {
_, err := fmt.Fprintf(e.w, msg, args...)
return err
}

View File

@ -0,0 +1,117 @@
package config
import (
"fmt"
"strings"
)
// Option defines a key/value entity in a config file.
type Option struct {
// Key preserving original caseness.
// Use IsKey instead to compare key regardless of caseness.
Key string
// Original value as string, could be not normalized.
Value string
}
type Options []*Option
// IsKey returns true if the given key matches
// this option's key in a case-insensitive comparison.
func (o *Option) IsKey(key string) bool {
return strings.ToLower(o.Key) == strings.ToLower(key)
}
func (opts Options) GoString() string {
var strs []string
for _, opt := range opts {
strs = append(strs, fmt.Sprintf("%#v", opt))
}
return strings.Join(strs, ", ")
}
// Get gets the value for the given key if set,
// otherwise it returns the empty string.
//
// Note that there is no difference
//
// This matches git behaviour since git v1.8.1-rc1,
// if there are multiple definitions of a key, the
// last one wins.
//
// See: http://article.gmane.org/gmane.linux.kernel/1407184
//
// In order to get all possible values for the same key,
// use GetAll.
func (opts Options) Get(key string) string {
for i := len(opts) - 1; i >= 0; i-- {
o := opts[i]
if o.IsKey(key) {
return o.Value
}
}
return ""
}
// GetAll returns all possible values for the same key.
func (opts Options) GetAll(key string) []string {
result := []string{}
for _, o := range opts {
if o.IsKey(key) {
result = append(result, o.Value)
}
}
return result
}
func (opts Options) withoutOption(key string) Options {
result := Options{}
for _, o := range opts {
if !o.IsKey(key) {
result = append(result, o)
}
}
return result
}
func (opts Options) withAddedOption(key string, value string) Options {
return append(opts, &Option{key, value})
}
func (opts Options) withSettedOption(key string, values ...string) Options {
var result Options
var added []string
for _, o := range opts {
if !o.IsKey(key) {
result = append(result, o)
continue
}
if contains(values, o.Value) {
added = append(added, o.Value)
result = append(result, o)
continue
}
}
for _, value := range values {
if contains(added, value) {
continue
}
result = result.withAddedOption(key, value)
}
return result
}
func contains(haystack []string, needle string) bool {
for _, s := range haystack {
if s == needle {
return true
}
}
return false
}

View File

@ -0,0 +1,146 @@
package config
import (
"fmt"
"strings"
)
// Section is the representation of a section inside git configuration files.
// Each Section contains Options that are used by both the Git plumbing
// and the porcelains.
// Sections can be further divided into subsections. To begin a subsection
// put its name in double quotes, separated by space from the section name,
// in the section header, like in the example below:
//
// [section "subsection"]
//
// All the other lines (and the remainder of the line after the section header)
// are recognized as option variables, in the form "name = value" (or just name,
// which is a short-hand to say that the variable is the boolean "true").
// The variable names are case-insensitive, allow only alphanumeric characters
// and -, and must start with an alphabetic character:
//
// [section "subsection1"]
// option1 = value1
// option2
// [section "subsection2"]
// option3 = value2
//
type Section struct {
Name string
Options Options
Subsections Subsections
}
type Subsection struct {
Name string
Options Options
}
type Sections []*Section
func (s Sections) GoString() string {
var strs []string
for _, ss := range s {
strs = append(strs, fmt.Sprintf("%#v", ss))
}
return strings.Join(strs, ", ")
}
type Subsections []*Subsection
func (s Subsections) GoString() string {
var strs []string
for _, ss := range s {
strs = append(strs, fmt.Sprintf("%#v", ss))
}
return strings.Join(strs, ", ")
}
// IsName checks if the name provided is equals to the Section name, case insensitive.
func (s *Section) IsName(name string) bool {
return strings.ToLower(s.Name) == strings.ToLower(name)
}
// Option return the value for the specified key. Empty string is returned if
// key does not exists.
func (s *Section) Option(key string) string {
return s.Options.Get(key)
}
// AddOption adds a new Option to the Section. The updated Section is returned.
func (s *Section) AddOption(key string, value string) *Section {
s.Options = s.Options.withAddedOption(key, value)
return s
}
// SetOption adds a new Option to the Section. If the option already exists, is replaced.
// The updated Section is returned.
func (s *Section) SetOption(key string, value string) *Section {
s.Options = s.Options.withSettedOption(key, value)
return s
}
// Remove an option with the specified key. The updated Section is returned.
func (s *Section) RemoveOption(key string) *Section {
s.Options = s.Options.withoutOption(key)
return s
}
// Subsection returns a Subsection from the specified Section. If the
// Subsection does not exists, new one is created and added to Section.
func (s *Section) Subsection(name string) *Subsection {
for i := len(s.Subsections) - 1; i >= 0; i-- {
ss := s.Subsections[i]
if ss.IsName(name) {
return ss
}
}
ss := &Subsection{Name: name}
s.Subsections = append(s.Subsections, ss)
return ss
}
// HasSubsection checks if the Section has a Subsection with the specified name.
func (s *Section) HasSubsection(name string) bool {
for _, ss := range s.Subsections {
if ss.IsName(name) {
return true
}
}
return false
}
// IsName checks if the name of the subsection is exactly the specified name.
func (s *Subsection) IsName(name string) bool {
return s.Name == name
}
// Option returns an option with the specified key. If the option does not exists,
// empty spring will be returned.
func (s *Subsection) Option(key string) string {
return s.Options.Get(key)
}
// AddOption adds a new Option to the Subsection. The updated Subsection is returned.
func (s *Subsection) AddOption(key string, value string) *Subsection {
s.Options = s.Options.withAddedOption(key, value)
return s
}
// SetOption adds a new Option to the Subsection. If the option already exists, is replaced.
// The updated Subsection is returned.
func (s *Subsection) SetOption(key string, value ...string) *Subsection {
s.Options = s.Options.withSettedOption(key, value...)
return s
}
// RemoveOption removes the option with the specified key. The updated Subsection is returned.
func (s *Subsection) RemoveOption(key string) *Subsection {
s.Options = s.Options.withoutOption(key)
return s
}

View File

@ -0,0 +1,58 @@
package diff
import (
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
)
// Operation defines the operation of a diff item.
type Operation int
const (
// Equal item represents a equals diff.
Equal Operation = iota
// Add item represents an insert diff.
Add
// Delete item represents a delete diff.
Delete
)
// Patch represents a collection of steps to transform several files.
type Patch interface {
// FilePatches returns a slice of patches per file.
FilePatches() []FilePatch
// Message returns an optional message that can be at the top of the
// Patch representation.
Message() string
}
// FilePatch represents the necessary steps to transform one file to another.
type FilePatch interface {
// IsBinary returns true if this patch is representing a binary file.
IsBinary() bool
// Files returns the from and to Files, with all the necessary metadata to
// about them. If the patch creates a new file, "from" will be nil.
// If the patch deletes a file, "to" will be nil.
Files() (from, to File)
// Chunks returns a slice of ordered changes to transform "from" File to
// "to" File. If the file is a binary one, Chunks will be empty.
Chunks() []Chunk
}
// File contains all the file metadata necessary to print some patch formats.
type File interface {
// Hash returns the File Hash.
Hash() plumbing.Hash
// Mode returns the FileMode.
Mode() filemode.FileMode
// Path returns the complete Path to the file, including the filename.
Path() string
}
// Chunk represents a portion of a file transformation to another.
type Chunk interface {
// Content contains the portion of the file.
Content() string
// Type contains the Operation to do with this Chunk.
Type() Operation
}

View File

@ -0,0 +1,360 @@
package diff
import (
"bytes"
"fmt"
"io"
"strings"
"gopkg.in/src-d/go-git.v4/plumbing"
)
const (
diffInit = "diff --git a/%s b/%s\n"
chunkStart = "@@ -"
chunkMiddle = " +"
chunkEnd = " @@%s\n"
chunkCount = "%d,%d"
noFilePath = "/dev/null"
aDir = "a/"
bDir = "b/"
fPath = "--- %s\n"
tPath = "+++ %s\n"
binary = "Binary files %s and %s differ\n"
addLine = "+%s\n"
deleteLine = "-%s\n"
equalLine = " %s\n"
oldMode = "old mode %o\n"
newMode = "new mode %o\n"
deletedFileMode = "deleted file mode %o\n"
newFileMode = "new file mode %o\n"
renameFrom = "from"
renameTo = "to"
renameFileMode = "rename %s %s\n"
indexAndMode = "index %s..%s %o\n"
indexNoMode = "index %s..%s\n"
DefaultContextLines = 3
)
// UnifiedEncoder encodes an unified diff into the provided Writer.
// There are some unsupported features:
// - Similarity index for renames
// - Sort hash representation
type UnifiedEncoder struct {
io.Writer
// ctxLines is the count of unchanged lines that will appear
// surrounding a change.
ctxLines int
buf bytes.Buffer
}
func NewUnifiedEncoder(w io.Writer, ctxLines int) *UnifiedEncoder {
return &UnifiedEncoder{ctxLines: ctxLines, Writer: w}
}
func (e *UnifiedEncoder) Encode(patch Patch) error {
e.printMessage(patch.Message())
if err := e.encodeFilePatch(patch.FilePatches()); err != nil {
return err
}
_, err := e.buf.WriteTo(e)
return err
}
func (e *UnifiedEncoder) encodeFilePatch(filePatches []FilePatch) error {
for _, p := range filePatches {
f, t := p.Files()
if err := e.header(f, t, p.IsBinary()); err != nil {
return err
}
g := newHunksGenerator(p.Chunks(), e.ctxLines)
for _, c := range g.Generate() {
c.WriteTo(&e.buf)
}
}
return nil
}
func (e *UnifiedEncoder) printMessage(message string) {
isEmpty := message == ""
hasSuffix := strings.HasSuffix(message, "\n")
if !isEmpty && !hasSuffix {
message = message + "\n"
}
e.buf.WriteString(message)
}
func (e *UnifiedEncoder) header(from, to File, isBinary bool) error {
switch {
case from == nil && to == nil:
return nil
case from != nil && to != nil:
hashEquals := from.Hash() == to.Hash()
fmt.Fprintf(&e.buf, diffInit, from.Path(), to.Path())
if from.Mode() != to.Mode() {
fmt.Fprintf(&e.buf, oldMode+newMode, from.Mode(), to.Mode())
}
if from.Path() != to.Path() {
fmt.Fprintf(&e.buf,
renameFileMode+renameFileMode,
renameFrom, from.Path(), renameTo, to.Path())
}
if from.Mode() != to.Mode() && !hashEquals {
fmt.Fprintf(&e.buf, indexNoMode, from.Hash(), to.Hash())
} else if !hashEquals {
fmt.Fprintf(&e.buf, indexAndMode, from.Hash(), to.Hash(), from.Mode())
}
if !hashEquals {
e.pathLines(isBinary, aDir+from.Path(), bDir+to.Path())
}
case from == nil:
fmt.Fprintf(&e.buf, diffInit, to.Path(), to.Path())
fmt.Fprintf(&e.buf, newFileMode, to.Mode())
fmt.Fprintf(&e.buf, indexNoMode, plumbing.ZeroHash, to.Hash())
e.pathLines(isBinary, noFilePath, bDir+to.Path())
case to == nil:
fmt.Fprintf(&e.buf, diffInit, from.Path(), from.Path())
fmt.Fprintf(&e.buf, deletedFileMode, from.Mode())
fmt.Fprintf(&e.buf, indexNoMode, from.Hash(), plumbing.ZeroHash)
e.pathLines(isBinary, aDir+from.Path(), noFilePath)
}
return nil
}
func (e *UnifiedEncoder) pathLines(isBinary bool, fromPath, toPath string) {
format := fPath + tPath
if isBinary {
format = binary
}
fmt.Fprintf(&e.buf, format, fromPath, toPath)
}
type hunksGenerator struct {
fromLine, toLine int
ctxLines int
chunks []Chunk
current *hunk
hunks []*hunk
beforeContext, afterContext []string
}
func newHunksGenerator(chunks []Chunk, ctxLines int) *hunksGenerator {
return &hunksGenerator{
chunks: chunks,
ctxLines: ctxLines,
}
}
func (c *hunksGenerator) Generate() []*hunk {
for i, chunk := range c.chunks {
ls := splitLines(chunk.Content())
lsLen := len(ls)
switch chunk.Type() {
case Equal:
c.fromLine += lsLen
c.toLine += lsLen
c.processEqualsLines(ls, i)
case Delete:
if lsLen != 0 {
c.fromLine++
}
c.processHunk(i, chunk.Type())
c.fromLine += lsLen - 1
c.current.AddOp(chunk.Type(), ls...)
case Add:
if lsLen != 0 {
c.toLine++
}
c.processHunk(i, chunk.Type())
c.toLine += lsLen - 1
c.current.AddOp(chunk.Type(), ls...)
}
if i == len(c.chunks)-1 && c.current != nil {
c.hunks = append(c.hunks, c.current)
}
}
return c.hunks
}
func (c *hunksGenerator) processHunk(i int, op Operation) {
if c.current != nil {
return
}
var ctxPrefix string
linesBefore := len(c.beforeContext)
if linesBefore > c.ctxLines {
ctxPrefix = " " + c.beforeContext[linesBefore-c.ctxLines-1]
c.beforeContext = c.beforeContext[linesBefore-c.ctxLines:]
linesBefore = c.ctxLines
}
c.current = &hunk{ctxPrefix: ctxPrefix}
c.current.AddOp(Equal, c.beforeContext...)
switch op {
case Delete:
c.current.fromLine, c.current.toLine =
c.addLineNumbers(c.fromLine, c.toLine, linesBefore, i, Add)
case Add:
c.current.toLine, c.current.fromLine =
c.addLineNumbers(c.toLine, c.fromLine, linesBefore, i, Delete)
}
c.beforeContext = nil
}
// addLineNumbers obtains the line numbers in a new chunk
func (c *hunksGenerator) addLineNumbers(la, lb int, linesBefore int, i int, op Operation) (cla, clb int) {
cla = la - linesBefore
// we need to search for a reference for the next diff
switch {
case linesBefore != 0 && c.ctxLines != 0:
if lb > c.ctxLines {
clb = lb - c.ctxLines + 1
} else {
clb = 1
}
case c.ctxLines == 0:
clb = lb
case i != len(c.chunks)-1:
next := c.chunks[i+1]
if next.Type() == op || next.Type() == Equal {
// this diff will be into this chunk
clb = lb + 1
}
}
return
}
func (c *hunksGenerator) processEqualsLines(ls []string, i int) {
if c.current == nil {
c.beforeContext = append(c.beforeContext, ls...)
return
}
c.afterContext = append(c.afterContext, ls...)
if len(c.afterContext) <= c.ctxLines*2 && i != len(c.chunks)-1 {
c.current.AddOp(Equal, c.afterContext...)
c.afterContext = nil
} else {
ctxLines := c.ctxLines
if ctxLines > len(c.afterContext) {
ctxLines = len(c.afterContext)
}
c.current.AddOp(Equal, c.afterContext[:ctxLines]...)
c.hunks = append(c.hunks, c.current)
c.current = nil
c.beforeContext = c.afterContext[ctxLines:]
c.afterContext = nil
}
}
func splitLines(s string) []string {
out := strings.Split(s, "\n")
if out[len(out)-1] == "" {
out = out[:len(out)-1]
}
return out
}
type hunk struct {
fromLine int
toLine int
fromCount int
toCount int
ctxPrefix string
ops []*op
}
func (c *hunk) WriteTo(buf *bytes.Buffer) {
buf.WriteString(chunkStart)
if c.fromCount == 1 {
fmt.Fprintf(buf, "%d", c.fromLine)
} else {
fmt.Fprintf(buf, chunkCount, c.fromLine, c.fromCount)
}
buf.WriteString(chunkMiddle)
if c.toCount == 1 {
fmt.Fprintf(buf, "%d", c.toLine)
} else {
fmt.Fprintf(buf, chunkCount, c.toLine, c.toCount)
}
fmt.Fprintf(buf, chunkEnd, c.ctxPrefix)
for _, d := range c.ops {
buf.WriteString(d.String())
}
}
func (c *hunk) AddOp(t Operation, s ...string) {
ls := len(s)
switch t {
case Add:
c.toCount += ls
case Delete:
c.fromCount += ls
case Equal:
c.toCount += ls
c.fromCount += ls
}
for _, l := range s {
c.ops = append(c.ops, &op{l, t})
}
}
type op struct {
text string
t Operation
}
func (o *op) String() string {
var prefix string
switch o.t {
case Add:
prefix = addLine
case Delete:
prefix = deleteLine
case Equal:
prefix = equalLine
}
return fmt.Sprintf(prefix, o.text)
}

View File

@ -0,0 +1,136 @@
package gitignore
import (
"bytes"
"io/ioutil"
"os"
"os/user"
"strings"
"gopkg.in/src-d/go-billy.v4"
"gopkg.in/src-d/go-git.v4/plumbing/format/config"
gioutil "gopkg.in/src-d/go-git.v4/utils/ioutil"
)
const (
commentPrefix = "#"
coreSection = "core"
eol = "\n"
excludesfile = "excludesfile"
gitDir = ".git"
gitignoreFile = ".gitignore"
gitconfigFile = ".gitconfig"
systemFile = "/etc/gitconfig"
)
// readIgnoreFile reads a specific git ignore file.
func readIgnoreFile(fs billy.Filesystem, path []string, ignoreFile string) (ps []Pattern, err error) {
f, err := fs.Open(fs.Join(append(path, ignoreFile)...))
if err == nil {
defer f.Close()
if data, err := ioutil.ReadAll(f); err == nil {
for _, s := range strings.Split(string(data), eol) {
if !strings.HasPrefix(s, commentPrefix) && len(strings.TrimSpace(s)) > 0 {
ps = append(ps, ParsePattern(s, path))
}
}
}
} else if !os.IsNotExist(err) {
return nil, err
}
return
}
// ReadPatterns reads gitignore patterns recursively traversing through the directory
// structure. The result is in the ascending order of priority (last higher).
func ReadPatterns(fs billy.Filesystem, path []string) (ps []Pattern, err error) {
ps, _ = readIgnoreFile(fs, path, gitignoreFile)
var fis []os.FileInfo
fis, err = fs.ReadDir(fs.Join(path...))
if err != nil {
return
}
for _, fi := range fis {
if fi.IsDir() && fi.Name() != gitDir {
var subps []Pattern
subps, err = ReadPatterns(fs, append(path, fi.Name()))
if err != nil {
return
}
if len(subps) > 0 {
ps = append(ps, subps...)
}
}
}
return
}
func loadPatterns(fs billy.Filesystem, path string) (ps []Pattern, err error) {
f, err := fs.Open(path)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
defer gioutil.CheckClose(f, &err)
b, err := ioutil.ReadAll(f)
if err != nil {
return
}
d := config.NewDecoder(bytes.NewBuffer(b))
raw := config.New()
if err = d.Decode(raw); err != nil {
return
}
s := raw.Section(coreSection)
efo := s.Options.Get(excludesfile)
if efo == "" {
return nil, nil
}
ps, err = readIgnoreFile(fs, nil, efo)
if os.IsNotExist(err) {
return nil, nil
}
return
}
// LoadGlobalPatterns loads gitignore patterns from from the gitignore file
// declared in a user's ~/.gitconfig file. If the ~/.gitconfig file does not
// exist the function will return nil. If the core.excludesfile property
// is not declared, the function will return nil. If the file pointed to by
// the core.excludesfile property does not exist, the function will return nil.
//
// The function assumes fs is rooted at the root filesystem.
func LoadGlobalPatterns(fs billy.Filesystem) (ps []Pattern, err error) {
usr, err := user.Current()
if err != nil {
return
}
return loadPatterns(fs, fs.Join(usr.HomeDir, gitconfigFile))
}
// LoadSystemPatterns loads gitignore patterns from from the gitignore file
// declared in a system's /etc/gitconfig file. If the ~/.gitconfig file does
// not exist the function will return nil. If the core.excludesfile property
// is not declared, the function will return nil. If the file pointed to by
// the core.excludesfile property does not exist, the function will return nil.
//
// The function assumes fs is rooted at the root filesystem.
func LoadSystemPatterns(fs billy.Filesystem) (ps []Pattern, err error) {
return loadPatterns(fs, systemFile)
}

View File

@ -0,0 +1,70 @@
// Package gitignore implements matching file system paths to gitignore patterns that
// can be automatically read from a git repository tree in the order of definition
// priorities. It support all pattern formats as specified in the original gitignore
// documentation, copied below:
//
// Pattern format
// ==============
//
// - A blank line matches no files, so it can serve as a separator for readability.
//
// - A line starting with # serves as a comment. Put a backslash ("\") in front of
// the first hash for patterns that begin with a hash.
//
// - Trailing spaces are ignored unless they are quoted with backslash ("\").
//
// - An optional prefix "!" which negates the pattern; any matching file excluded
// by a previous pattern will become included again. It is not possible to
// re-include a file if a parent directory of that file is excluded.
// Git doesnt list excluded directories for performance reasons, so
// any patterns on contained files have no effect, no matter where they are
// defined. Put a backslash ("\") in front of the first "!" for patterns
// that begin with a literal "!", for example, "\!important!.txt".
//
// - If the pattern ends with a slash, it is removed for the purpose of the
// following description, but it would only find a match with a directory.
// In other words, foo/ will match a directory foo and paths underneath it,
// but will not match a regular file or a symbolic link foo (this is consistent
// with the way how pathspec works in general in Git).
//
// - If the pattern does not contain a slash /, Git treats it as a shell glob
// pattern and checks for a match against the pathname relative to the location
// of the .gitignore file (relative to the toplevel of the work tree if not
// from a .gitignore file).
//
// - Otherwise, Git treats the pattern as a shell glob suitable for consumption
// by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
// not match a / in the pathname. For example, "Documentation/*.html" matches
// "Documentation/git.html" but not "Documentation/ppc/ppc.html" or
// "tools/perf/Documentation/perf.html".
//
// - A leading slash matches the beginning of the pathname. For example,
// "/*.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c".
//
// Two consecutive asterisks ("**") in patterns matched against full pathname
// may have special meaning:
//
// - A leading "**" followed by a slash means match in all directories.
// For example, "**/foo" matches file or directory "foo" anywhere, the same as
// pattern "foo". "**/foo/bar" matches file or directory "bar"
// anywhere that is directly under directory "foo".
//
// - A trailing "/**" matches everything inside. For example, "abc/**" matches
// all files inside directory "abc", relative to the location of the
// .gitignore file, with infinite depth.
//
// - A slash followed by two consecutive asterisks then a slash matches
// zero or more directories. For example, "a/**/b" matches "a/b", "a/x/b",
// "a/x/y/b" and so on.
//
// - Other consecutive asterisks are considered invalid.
//
// Copyright and license
// =====================
//
// Copyright (c) Oleg Sklyar, Silvertern and source{d}
//
// The package code was donated to source{d} to include, modify and develop
// further as a part of the `go-git` project, release it on the license of
// the whole project or delete it from the project.
package gitignore

View File

@ -0,0 +1,30 @@
package gitignore
// Matcher defines a global multi-pattern matcher for gitignore patterns
type Matcher interface {
// Match matches patterns in the order of priorities. As soon as an inclusion or
// exclusion is found, not further matching is performed.
Match(path []string, isDir bool) bool
}
// NewMatcher constructs a new global matcher. Patterns must be given in the order of
// increasing priority. That is most generic settings files first, then the content of
// the repo .gitignore, then content of .gitignore down the path or the repo and then
// the content command line arguments.
func NewMatcher(ps []Pattern) Matcher {
return &matcher{ps}
}
type matcher struct {
patterns []Pattern
}
func (m *matcher) Match(path []string, isDir bool) bool {
n := len(m.patterns)
for i := n - 1; i >= 0; i-- {
if match := m.patterns[i].Match(path, isDir); match > NoMatch {
return match == Exclude
}
}
return false
}

View File

@ -0,0 +1,153 @@
package gitignore
import (
"path/filepath"
"strings"
)
// MatchResult defines outcomes of a match, no match, exclusion or inclusion.
type MatchResult int
const (
// NoMatch defines the no match outcome of a match check
NoMatch MatchResult = iota
// Exclude defines an exclusion of a file as a result of a match check
Exclude
// Include defines an explicit inclusion of a file as a result of a match check
Include
)
const (
inclusionPrefix = "!"
zeroToManyDirs = "**"
patternDirSep = "/"
)
// Pattern defines a single gitignore pattern.
type Pattern interface {
// Match matches the given path to the pattern.
Match(path []string, isDir bool) MatchResult
}
type pattern struct {
domain []string
pattern []string
inclusion bool
dirOnly bool
isGlob bool
}
// ParsePattern parses a gitignore pattern string into the Pattern structure.
func ParsePattern(p string, domain []string) Pattern {
res := pattern{domain: domain}
if strings.HasPrefix(p, inclusionPrefix) {
res.inclusion = true
p = p[1:]
}
if !strings.HasSuffix(p, "\\ ") {
p = strings.TrimRight(p, " ")
}
if strings.HasSuffix(p, patternDirSep) {
res.dirOnly = true
p = p[:len(p)-1]
}
if strings.Contains(p, patternDirSep) {
res.isGlob = true
}
res.pattern = strings.Split(p, patternDirSep)
return &res
}
func (p *pattern) Match(path []string, isDir bool) MatchResult {
if len(path) <= len(p.domain) {
return NoMatch
}
for i, e := range p.domain {
if path[i] != e {
return NoMatch
}
}
path = path[len(p.domain):]
if p.isGlob && !p.globMatch(path, isDir) {
return NoMatch
} else if !p.isGlob && !p.simpleNameMatch(path, isDir) {
return NoMatch
}
if p.inclusion {
return Include
} else {
return Exclude
}
}
func (p *pattern) simpleNameMatch(path []string, isDir bool) bool {
for i, name := range path {
if match, err := filepath.Match(p.pattern[0], name); err != nil {
return false
} else if !match {
continue
}
if p.dirOnly && !isDir && i == len(path)-1 {
return false
}
return true
}
return false
}
func (p *pattern) globMatch(path []string, isDir bool) bool {
matched := false
canTraverse := false
for i, pattern := range p.pattern {
if pattern == "" {
canTraverse = false
continue
}
if pattern == zeroToManyDirs {
if i == len(p.pattern)-1 {
break
}
canTraverse = true
continue
}
if strings.Contains(pattern, zeroToManyDirs) {
return false
}
if len(path) == 0 {
return false
}
if canTraverse {
canTraverse = false
for len(path) > 0 {
e := path[0]
path = path[1:]
if match, err := filepath.Match(pattern, e); err != nil {
return false
} else if match {
matched = true
break
} else if len(path) == 0 {
// if nothing left then fail
matched = false
}
}
} else {
if match, err := filepath.Match(pattern, path[0]); err != nil || !match {
return false
}
matched = true
path = path[1:]
}
}
if matched && p.dirOnly && !isDir && len(path) == 0 {
matched = false
}
return matched
}

View File

@ -0,0 +1,181 @@
package idxfile
import (
"bufio"
"bytes"
"errors"
"io"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
var (
// ErrUnsupportedVersion is returned by Decode when the idx file version
// is not supported.
ErrUnsupportedVersion = errors.New("Unsuported version")
// ErrMalformedIdxFile is returned by Decode when the idx file is corrupted.
ErrMalformedIdxFile = errors.New("Malformed IDX file")
)
const (
fanout = 256
objectIDLength = 20
)
// Decoder reads and decodes idx files from an input stream.
type Decoder struct {
*bufio.Reader
}
// NewDecoder builds a new idx stream decoder, that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{bufio.NewReader(r)}
}
// Decode reads from the stream and decode the content into the MemoryIndex struct.
func (d *Decoder) Decode(idx *MemoryIndex) error {
if err := validateHeader(d); err != nil {
return err
}
flow := []func(*MemoryIndex, io.Reader) error{
readVersion,
readFanout,
readObjectNames,
readCRC32,
readOffsets,
readChecksums,
}
for _, f := range flow {
if err := f(idx, d); err != nil {
return err
}
}
return nil
}
func validateHeader(r io.Reader) error {
var h = make([]byte, 4)
if _, err := io.ReadFull(r, h); err != nil {
return err
}
if !bytes.Equal(h, idxHeader) {
return ErrMalformedIdxFile
}
return nil
}
func readVersion(idx *MemoryIndex, r io.Reader) error {
v, err := binary.ReadUint32(r)
if err != nil {
return err
}
if v > VersionSupported {
return ErrUnsupportedVersion
}
idx.Version = v
return nil
}
func readFanout(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
n, err := binary.ReadUint32(r)
if err != nil {
return err
}
idx.Fanout[k] = n
idx.FanoutMapping[k] = noMapping
}
return nil
}
func readObjectNames(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
var buckets uint32
if k == 0 {
buckets = idx.Fanout[k]
} else {
buckets = idx.Fanout[k] - idx.Fanout[k-1]
}
if buckets == 0 {
continue
}
if buckets < 0 {
return ErrMalformedIdxFile
}
idx.FanoutMapping[k] = len(idx.Names)
nameLen := int(buckets * objectIDLength)
bin := make([]byte, nameLen)
if _, err := io.ReadFull(r, bin); err != nil {
return err
}
idx.Names = append(idx.Names, bin)
idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4))
idx.CRC32 = append(idx.CRC32, make([]byte, buckets*4))
}
return nil
}
func readCRC32(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.CRC32[pos]); err != nil {
return err
}
}
}
return nil
}
func readOffsets(idx *MemoryIndex, r io.Reader) error {
var o64cnt int
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil {
return err
}
for p := 0; p < len(idx.Offset32[pos]); p += 4 {
if idx.Offset32[pos][p]&(byte(1)<<7) > 0 {
o64cnt++
}
}
}
}
if o64cnt > 0 {
idx.Offset64 = make([]byte, o64cnt*8)
if _, err := io.ReadFull(r, idx.Offset64); err != nil {
return err
}
}
return nil
}
func readChecksums(idx *MemoryIndex, r io.Reader) error {
if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil {
return err
}
if _, err := io.ReadFull(r, idx.IdxChecksum[:]); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,128 @@
// Package idxfile implements encoding and decoding of packfile idx files.
//
// == Original (version 1) pack-*.idx files have the following format:
//
// - The header consists of 256 4-byte network byte order
// integers. N-th entry of this table records the number of
// objects in the corresponding pack, the first byte of whose
// object name is less than or equal to N. This is called the
// 'first-level fan-out' table.
//
// - The header is followed by sorted 24-byte entries, one entry
// per object in the pack. Each entry is:
//
// 4-byte network byte order integer, recording where the
// object is stored in the packfile as the offset from the
// beginning.
//
// 20-byte object name.
//
// - The file is concluded with a trailer:
//
// A copy of the 20-byte SHA1 checksum at the end of
// corresponding packfile.
//
// 20-byte SHA1-checksum of all of the above.
//
// Pack Idx file:
//
// -- +--------------------------------+
// fanout | fanout[0] = 2 (for example) |-.
// table +--------------------------------+ |
// | fanout[1] | |
// +--------------------------------+ |
// | fanout[2] | |
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
// | fanout[255] = total objects |---.
// -- +--------------------------------+ | |
// main | offset | | |
// index | object name 00XXXXXXXXXXXXXXXX | | |
// tab +--------------------------------+ | |
// | offset | | |
// | object name 00XXXXXXXXXXXXXXXX | | |
// +--------------------------------+<+ |
// .-| offset | |
// | | object name 01XXXXXXXXXXXXXXXX | |
// | +--------------------------------+ |
// | | offset | |
// | | object name 01XXXXXXXXXXXXXXXX | |
// | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
// | | offset | |
// | | object name FFXXXXXXXXXXXXXXXX | |
// --| +--------------------------------+<--+
// trailer | | packfile checksum |
// | +--------------------------------+
// | | idxfile checksum |
// | +--------------------------------+
// .---------.
// |
// Pack file entry: <+
//
// packed object header:
// 1-byte size extension bit (MSB)
// type (next 3 bit)
// size0 (lower 4-bit)
// n-byte sizeN (as long as MSB is set, each 7-bit)
// size0..sizeN form 4+7+7+..+7 bit integer, size0
// is the least significant part, and sizeN is the
// most significant part.
// packed object data:
// If it is not DELTA, then deflated bytes (the size above
// is the size before compression).
// If it is REF_DELTA, then
// 20-byte base object name SHA1 (the size above is the
// size of the delta data that follows).
// delta data, deflated.
// If it is OFS_DELTA, then
// n-byte offset (see below) interpreted as a negative
// offset from the type-byte of the header of the
// ofs-delta entry (the size above is the size of
// the delta data that follows).
// delta data, deflated.
//
// offset encoding:
// n bytes with MSB set in all but the last one.
// The offset is then the number constructed by
// concatenating the lower 7 bit of each byte, and
// for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1))
// to the result.
//
// == Version 2 pack-*.idx files support packs larger than 4 GiB, and
// have some other reorganizations. They have the format:
//
// - A 4-byte magic number '\377tOc' which is an unreasonable
// fanout[0] value.
//
// - A 4-byte version number (= 2)
//
// - A 256-entry fan-out table just like v1.
//
// - A table of sorted 20-byte SHA1 object names. These are
// packed together without offset values to reduce the cache
// footprint of the binary search for a specific object name.
//
// - A table of 4-byte CRC32 values of the packed object data.
// This is new in v2 so compressed data can be copied directly
// from pack to pack during repacking without undetected
// data corruption.
//
// - A table of 4-byte offset values (in network byte order).
// These are usually 31-bit pack file offsets, but large
// offsets are encoded as an index into the next table with
// the msbit set.
//
// - A table of 8-byte offset entries (empty for pack files less
// than 2 GiB). Pack files are organized with heavily used
// objects toward the front, so most object references should
// not need to refer to this table.
//
// - The same trailer as a v1 pack file:
//
// A copy of the 20-byte SHA1 checksum at the end of
// corresponding packfile.
//
// 20-byte SHA1-checksum of all of the above.
//
// Source:
// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-format.txt
package idxfile

View File

@ -0,0 +1,142 @@
package idxfile
import (
"crypto/sha1"
"hash"
"io"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
}
// NewEncoder returns a new stream encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
h := sha1.New()
mw := io.MultiWriter(w, h)
return &Encoder{mw, h}
}
// Encode encodes an MemoryIndex to the encoder writer.
func (e *Encoder) Encode(idx *MemoryIndex) (int, error) {
flow := []func(*MemoryIndex) (int, error){
e.encodeHeader,
e.encodeFanout,
e.encodeHashes,
e.encodeCRC32,
e.encodeOffsets,
e.encodeChecksums,
}
sz := 0
for _, f := range flow {
i, err := f(idx)
sz += i
if err != nil {
return sz, err
}
}
return sz, nil
}
func (e *Encoder) encodeHeader(idx *MemoryIndex) (int, error) {
c, err := e.Write(idxHeader)
if err != nil {
return c, err
}
return c + 4, binary.WriteUint32(e, idx.Version)
}
func (e *Encoder) encodeFanout(idx *MemoryIndex) (int, error) {
for _, c := range idx.Fanout {
if err := binary.WriteUint32(e, c); err != nil {
return 0, err
}
}
return fanout * 4, nil
}
func (e *Encoder) encodeHashes(idx *MemoryIndex) (int, error) {
var size int
for k := 0; k < fanout; k++ {
pos := idx.FanoutMapping[k]
if pos == noMapping {
continue
}
n, err := e.Write(idx.Names[pos])
if err != nil {
return size, err
}
size += n
}
return size, nil
}
func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) {
var size int
for k := 0; k < fanout; k++ {
pos := idx.FanoutMapping[k]
if pos == noMapping {
continue
}
n, err := e.Write(idx.CRC32[pos])
if err != nil {
return size, err
}
size += n
}
return size, nil
}
func (e *Encoder) encodeOffsets(idx *MemoryIndex) (int, error) {
var size int
for k := 0; k < fanout; k++ {
pos := idx.FanoutMapping[k]
if pos == noMapping {
continue
}
n, err := e.Write(idx.Offset32[pos])
if err != nil {
return size, err
}
size += n
}
if len(idx.Offset64) > 0 {
n, err := e.Write(idx.Offset64)
if err != nil {
return size, err
}
size += n
}
return size, nil
}
func (e *Encoder) encodeChecksums(idx *MemoryIndex) (int, error) {
if _, err := e.Write(idx.PackfileChecksum[:]); err != nil {
return 0, err
}
copy(idx.IdxChecksum[:], e.hash.Sum(nil)[:20])
if _, err := e.Write(idx.IdxChecksum[:]); err != nil {
return 0, err
}
return 40, nil
}

View File

@ -0,0 +1,347 @@
package idxfile
import (
"bytes"
"io"
"sort"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
const (
// VersionSupported is the only idx version supported.
VersionSupported = 2
noMapping = -1
)
var (
idxHeader = []byte{255, 't', 'O', 'c'}
)
// Index represents an index of a packfile.
type Index interface {
// Contains checks whether the given hash is in the index.
Contains(h plumbing.Hash) (bool, error)
// FindOffset finds the offset in the packfile for the object with
// the given hash.
FindOffset(h plumbing.Hash) (int64, error)
// FindCRC32 finds the CRC32 of the object with the given hash.
FindCRC32(h plumbing.Hash) (uint32, error)
// FindHash finds the hash for the object with the given offset.
FindHash(o int64) (plumbing.Hash, error)
// Count returns the number of entries in the index.
Count() (int64, error)
// Entries returns an iterator to retrieve all index entries.
Entries() (EntryIter, error)
// EntriesByOffset returns an iterator to retrieve all index entries ordered
// by offset.
EntriesByOffset() (EntryIter, error)
}
// MemoryIndex is the in memory representation of an idx file.
type MemoryIndex struct {
Version uint32
Fanout [256]uint32
// FanoutMapping maps the position in the fanout table to the position
// in the Names, Offset32 and CRC32 slices. This improves the memory
// usage by not needing an array with unnecessary empty slots.
FanoutMapping [256]int
Names [][]byte
Offset32 [][]byte
CRC32 [][]byte
Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte
offsetHash map[int64]plumbing.Hash
}
var _ Index = (*MemoryIndex)(nil)
// NewMemoryIndex returns an instance of a new MemoryIndex.
func NewMemoryIndex() *MemoryIndex {
return &MemoryIndex{}
}
func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) {
k := idx.FanoutMapping[h[0]]
if k == noMapping {
return 0, false
}
if len(idx.Names) <= k {
return 0, false
}
data := idx.Names[k]
high := uint64(len(idx.Offset32[k])) >> 2
if high == 0 {
return 0, false
}
low := uint64(0)
for {
mid := (low + high) >> 1
offset := mid * objectIDLength
cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
if cmp < 0 {
high = mid
} else if cmp == 0 {
return int(mid), true
} else {
low = mid + 1
}
if low >= high {
break
}
}
return 0, false
}
// Contains implements the Index interface.
func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
_, ok := idx.findHashIndex(h)
return ok, nil
}
// FindOffset implements the Index interface.
func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
if len(idx.FanoutMapping) <= int(h[0]) {
return 0, plumbing.ErrObjectNotFound
}
k := idx.FanoutMapping[h[0]]
i, ok := idx.findHashIndex(h)
if !ok {
return 0, plumbing.ErrObjectNotFound
}
return idx.getOffset(k, i)
}
const isO64Mask = uint64(1) << 31
func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) {
offset := secondLevel << 2
buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4])
ofs, err := binary.ReadUint32(buf)
if err != nil {
return -1, err
}
if (uint64(ofs) & isO64Mask) != 0 {
offset := 8 * (uint64(ofs) & ^isO64Mask)
buf := bytes.NewBuffer(idx.Offset64[offset : offset+8])
n, err := binary.ReadUint64(buf)
if err != nil {
return -1, err
}
return int64(n), nil
}
return int64(ofs), nil
}
// FindCRC32 implements the Index interface.
func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
k := idx.FanoutMapping[h[0]]
i, ok := idx.findHashIndex(h)
if !ok {
return 0, plumbing.ErrObjectNotFound
}
return idx.getCRC32(k, i)
}
func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) (uint32, error) {
offset := secondLevel << 2
buf := bytes.NewBuffer(idx.CRC32[firstLevel][offset : offset+4])
return binary.ReadUint32(buf)
}
// FindHash implements the Index interface.
func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) {
// Lazily generate the reverse offset/hash map if required.
if idx.offsetHash == nil {
if err := idx.genOffsetHash(); err != nil {
return plumbing.ZeroHash, err
}
}
hash, ok := idx.offsetHash[o]
if !ok {
return plumbing.ZeroHash, plumbing.ErrObjectNotFound
}
return hash, nil
}
// genOffsetHash generates the offset/hash mapping for reverse search.
func (idx *MemoryIndex) genOffsetHash() error {
count, err := idx.Count()
if err != nil {
return err
}
idx.offsetHash = make(map[int64]plumbing.Hash, count)
iter, err := idx.Entries()
if err != nil {
return err
}
for {
entry, err := iter.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
idx.offsetHash[int64(entry.Offset)] = entry.Hash
}
}
// Count implements the Index interface.
func (idx *MemoryIndex) Count() (int64, error) {
return int64(idx.Fanout[fanout-1]), nil
}
// Entries implements the Index interface.
func (idx *MemoryIndex) Entries() (EntryIter, error) {
return &idxfileEntryIter{idx, 0, 0, 0}, nil
}
// EntriesByOffset implements the Index interface.
func (idx *MemoryIndex) EntriesByOffset() (EntryIter, error) {
count, err := idx.Count()
if err != nil {
return nil, err
}
iter := &idxfileEntryOffsetIter{
entries: make(entriesByOffset, count),
}
entries, err := idx.Entries()
if err != nil {
return nil, err
}
for pos := 0; int64(pos) < count; pos++ {
entry, err := entries.Next()
if err != nil {
return nil, err
}
iter.entries[pos] = entry
}
sort.Sort(iter.entries)
return iter, nil
}
// EntryIter is an iterator that will return the entries in a packfile index.
type EntryIter interface {
// Next returns the next entry in the packfile index.
Next() (*Entry, error)
// Close closes the iterator.
Close() error
}
type idxfileEntryIter struct {
idx *MemoryIndex
total int
firstLevel, secondLevel int
}
func (i *idxfileEntryIter) Next() (*Entry, error) {
for {
if i.firstLevel >= fanout {
return nil, io.EOF
}
if i.total >= int(i.idx.Fanout[i.firstLevel]) {
i.firstLevel++
i.secondLevel = 0
continue
}
entry := new(Entry)
ofs := i.secondLevel * objectIDLength
copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:])
pos := i.idx.FanoutMapping[entry.Hash[0]]
offset, err := i.idx.getOffset(pos, i.secondLevel)
if err != nil {
return nil, err
}
entry.Offset = uint64(offset)
entry.CRC32, err = i.idx.getCRC32(pos, i.secondLevel)
if err != nil {
return nil, err
}
i.secondLevel++
i.total++
return entry, nil
}
}
func (i *idxfileEntryIter) Close() error {
i.firstLevel = fanout
return nil
}
// Entry is the in memory representation of an object entry in the idx file.
type Entry struct {
Hash plumbing.Hash
CRC32 uint32
Offset uint64
}
type idxfileEntryOffsetIter struct {
entries entriesByOffset
pos int
}
func (i *idxfileEntryOffsetIter) Next() (*Entry, error) {
if i.pos >= len(i.entries) {
return nil, io.EOF
}
entry := i.entries[i.pos]
i.pos++
return entry, nil
}
func (i *idxfileEntryOffsetIter) Close() error {
i.pos = len(i.entries) + 1
return nil
}
type entriesByOffset []*Entry
func (o entriesByOffset) Len() int {
return len(o)
}
func (o entriesByOffset) Less(i int, j int) bool {
return o[i].Offset < o[j].Offset
}
func (o entriesByOffset) Swap(i int, j int) {
o[i], o[j] = o[j], o[i]
}

View File

@ -0,0 +1,186 @@
package idxfile
import (
"bytes"
"fmt"
"math"
"sort"
"sync"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
// objects implements sort.Interface and uses hash as sorting key.
type objects []Entry
// Writer implements a packfile Observer interface and is used to generate
// indexes.
type Writer struct {
m sync.Mutex
count uint32
checksum plumbing.Hash
objects objects
offset64 uint32
finished bool
index *MemoryIndex
added map[plumbing.Hash]struct{}
}
// Index returns a previously created MemoryIndex or creates a new one if
// needed.
func (w *Writer) Index() (*MemoryIndex, error) {
w.m.Lock()
defer w.m.Unlock()
if w.index == nil {
return w.createIndex()
}
return w.index, nil
}
// Add appends new object data.
func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) {
w.m.Lock()
defer w.m.Unlock()
if w.added == nil {
w.added = make(map[plumbing.Hash]struct{})
}
if _, ok := w.added[h]; !ok {
w.added[h] = struct{}{}
w.objects = append(w.objects, Entry{h, crc, pos})
}
}
func (w *Writer) Finished() bool {
return w.finished
}
// OnHeader implements packfile.Observer interface.
func (w *Writer) OnHeader(count uint32) error {
w.count = count
w.objects = make(objects, 0, count)
return nil
}
// OnInflatedObjectHeader implements packfile.Observer interface.
func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error {
return nil
}
// OnInflatedObjectContent implements packfile.Observer interface.
func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error {
w.Add(h, uint64(pos), crc)
return nil
}
// OnFooter implements packfile.Observer interface.
func (w *Writer) OnFooter(h plumbing.Hash) error {
w.checksum = h
w.finished = true
_, err := w.createIndex()
if err != nil {
return err
}
return nil
}
// creatIndex returns a filled MemoryIndex with the information filled by
// the observer callbacks.
func (w *Writer) createIndex() (*MemoryIndex, error) {
if !w.finished {
return nil, fmt.Errorf("the index still hasn't finished building")
}
idx := new(MemoryIndex)
w.index = idx
sort.Sort(w.objects)
// unmap all fans by default
for i := range idx.FanoutMapping {
idx.FanoutMapping[i] = noMapping
}
buf := new(bytes.Buffer)
last := -1
bucket := -1
for i, o := range w.objects {
fan := o.Hash[0]
// fill the gaps between fans
for j := last + 1; j < int(fan); j++ {
idx.Fanout[j] = uint32(i)
}
// update the number of objects for this position
idx.Fanout[fan] = uint32(i + 1)
// we move from one bucket to another, update counters and allocate
// memory
if last != int(fan) {
bucket++
idx.FanoutMapping[fan] = bucket
last = int(fan)
idx.Names = append(idx.Names, make([]byte, 0))
idx.Offset32 = append(idx.Offset32, make([]byte, 0))
idx.CRC32 = append(idx.CRC32, make([]byte, 0))
}
idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...)
offset := o.Offset
if offset > math.MaxInt32 {
offset = w.addOffset64(offset)
}
buf.Truncate(0)
binary.WriteUint32(buf, uint32(offset))
idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...)
buf.Truncate(0)
binary.WriteUint32(buf, uint32(o.CRC32))
idx.CRC32[bucket] = append(idx.CRC32[bucket], buf.Bytes()...)
}
for j := last + 1; j < 256; j++ {
idx.Fanout[j] = uint32(len(w.objects))
}
idx.Version = VersionSupported
idx.PackfileChecksum = w.checksum
return idx, nil
}
func (w *Writer) addOffset64(pos uint64) uint64 {
buf := new(bytes.Buffer)
binary.WriteUint64(buf, pos)
w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...)
index := uint64(w.offset64 | (1 << 31))
w.offset64++
return index
}
func (o objects) Len() int {
return len(o)
}
func (o objects) Less(i int, j int) bool {
cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:])
return cmp < 0
}
func (o objects) Swap(i int, j int) {
o[i], o[j] = o[j], o[i]
}

View File

@ -0,0 +1,476 @@
package index
import (
"bytes"
"crypto/sha1"
"errors"
"hash"
"io"
"io/ioutil"
"strconv"
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
var (
// DecodeVersionSupported is the range of supported index versions
DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4}
// ErrMalformedSignature is returned by Decode when the index header file is
// malformed
ErrMalformedSignature = errors.New("malformed index signature file")
// ErrInvalidChecksum is returned by Decode if the SHA1 hash mismatch with
// the read content
ErrInvalidChecksum = errors.New("invalid checksum")
errUnknownExtension = errors.New("unknown extension")
)
const (
entryHeaderLength = 62
entryExtended = 0x4000
entryValid = 0x8000
nameMask = 0xfff
intentToAddMask = 1 << 13
skipWorkTreeMask = 1 << 14
)
// A Decoder reads and decodes index files from an input stream.
type Decoder struct {
r io.Reader
hash hash.Hash
lastEntry *Entry
}
// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
h := sha1.New()
return &Decoder{
r: io.TeeReader(r, h),
hash: h,
}
}
// Decode reads the whole index object from its input and stores it in the
// value pointed to by idx.
func (d *Decoder) Decode(idx *Index) error {
var err error
idx.Version, err = validateHeader(d.r)
if err != nil {
return err
}
entryCount, err := binary.ReadUint32(d.r)
if err != nil {
return err
}
if err := d.readEntries(idx, int(entryCount)); err != nil {
return err
}
return d.readExtensions(idx)
}
func (d *Decoder) readEntries(idx *Index, count int) error {
for i := 0; i < count; i++ {
e, err := d.readEntry(idx)
if err != nil {
return err
}
d.lastEntry = e
idx.Entries = append(idx.Entries, e)
}
return nil
}
func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
e := &Entry{}
var msec, mnsec, sec, nsec uint32
var flags uint16
flow := []interface{}{
&sec, &nsec,
&msec, &mnsec,
&e.Dev,
&e.Inode,
&e.Mode,
&e.UID,
&e.GID,
&e.Size,
&e.Hash,
&flags,
}
if err := binary.Read(d.r, flow...); err != nil {
return nil, err
}
read := entryHeaderLength
if sec != 0 || nsec != 0 {
e.CreatedAt = time.Unix(int64(sec), int64(nsec))
}
if msec != 0 || mnsec != 0 {
e.ModifiedAt = time.Unix(int64(msec), int64(mnsec))
}
e.Stage = Stage(flags>>12) & 0x3
if flags&entryExtended != 0 {
extended, err := binary.ReadUint16(d.r)
if err != nil {
return nil, err
}
read += 2
e.IntentToAdd = extended&intentToAddMask != 0
e.SkipWorktree = extended&skipWorkTreeMask != 0
}
if err := d.readEntryName(idx, e, flags); err != nil {
return nil, err
}
return e, d.padEntry(idx, e, read)
}
func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error {
var name string
var err error
switch idx.Version {
case 2, 3:
len := flags & nameMask
name, err = d.doReadEntryName(len)
case 4:
name, err = d.doReadEntryNameV4()
default:
return ErrUnsupportedVersion
}
if err != nil {
return err
}
e.Name = name
return nil
}
func (d *Decoder) doReadEntryNameV4() (string, error) {
l, err := binary.ReadVariableWidthInt(d.r)
if err != nil {
return "", err
}
var base string
if d.lastEntry != nil {
base = d.lastEntry.Name[:len(d.lastEntry.Name)-int(l)]
}
name, err := binary.ReadUntil(d.r, '\x00')
if err != nil {
return "", err
}
return base + string(name), nil
}
func (d *Decoder) doReadEntryName(len uint16) (string, error) {
name := make([]byte, len)
if err := binary.Read(d.r, &name); err != nil {
return "", err
}
return string(name), nil
}
// Index entries are padded out to the next 8 byte alignment
// for historical reasons related to how C Git read the files.
func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error {
if idx.Version == 4 {
return nil
}
entrySize := read + len(e.Name)
padLen := 8 - entrySize%8
_, err := io.CopyN(ioutil.Discard, d.r, int64(padLen))
return err
}
func (d *Decoder) readExtensions(idx *Index) error {
// TODO: support 'Split index' and 'Untracked cache' extensions, take in
// count that they are not supported by jgit or libgit
var expected []byte
var err error
var header [4]byte
for {
expected = d.hash.Sum(nil)
var n int
if n, err = io.ReadFull(d.r, header[:]); err != nil {
if n == 0 {
err = io.EOF
}
break
}
err = d.readExtension(idx, header[:])
if err != nil {
break
}
}
if err != errUnknownExtension {
return err
}
return d.readChecksum(expected, header)
}
func (d *Decoder) readExtension(idx *Index, header []byte) error {
switch {
case bytes.Equal(header, treeExtSignature):
r, err := d.getExtensionReader()
if err != nil {
return err
}
idx.Cache = &Tree{}
d := &treeExtensionDecoder{r}
if err := d.Decode(idx.Cache); err != nil {
return err
}
case bytes.Equal(header, resolveUndoExtSignature):
r, err := d.getExtensionReader()
if err != nil {
return err
}
idx.ResolveUndo = &ResolveUndo{}
d := &resolveUndoDecoder{r}
if err := d.Decode(idx.ResolveUndo); err != nil {
return err
}
case bytes.Equal(header, endOfIndexEntryExtSignature):
r, err := d.getExtensionReader()
if err != nil {
return err
}
idx.EndOfIndexEntry = &EndOfIndexEntry{}
d := &endOfIndexEntryDecoder{r}
if err := d.Decode(idx.EndOfIndexEntry); err != nil {
return err
}
default:
return errUnknownExtension
}
return nil
}
func (d *Decoder) getExtensionReader() (io.Reader, error) {
len, err := binary.ReadUint32(d.r)
if err != nil {
return nil, err
}
return &io.LimitedReader{R: d.r, N: int64(len)}, nil
}
func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error {
var h plumbing.Hash
copy(h[:4], alreadyRead[:])
if err := binary.Read(d.r, h[4:]); err != nil {
return err
}
if !bytes.Equal(h[:], expected) {
return ErrInvalidChecksum
}
return nil
}
func validateHeader(r io.Reader) (version uint32, err error) {
var s = make([]byte, 4)
if _, err := io.ReadFull(r, s); err != nil {
return 0, err
}
if !bytes.Equal(s, indexSignature) {
return 0, ErrMalformedSignature
}
version, err = binary.ReadUint32(r)
if err != nil {
return 0, err
}
if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max {
return 0, ErrUnsupportedVersion
}
return
}
type treeExtensionDecoder struct {
r io.Reader
}
func (d *treeExtensionDecoder) Decode(t *Tree) error {
for {
e, err := d.readEntry()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if e == nil {
continue
}
t.Entries = append(t.Entries, *e)
}
}
func (d *treeExtensionDecoder) readEntry() (*TreeEntry, error) {
e := &TreeEntry{}
path, err := binary.ReadUntil(d.r, '\x00')
if err != nil {
return nil, err
}
e.Path = string(path)
count, err := binary.ReadUntil(d.r, ' ')
if err != nil {
return nil, err
}
i, err := strconv.Atoi(string(count))
if err != nil {
return nil, err
}
// An entry can be in an invalidated state and is represented by having a
// negative number in the entry_count field.
if i == -1 {
return nil, nil
}
e.Entries = i
trees, err := binary.ReadUntil(d.r, '\n')
if err != nil {
return nil, err
}
i, err = strconv.Atoi(string(trees))
if err != nil {
return nil, err
}
e.Trees = i
if err := binary.Read(d.r, &e.Hash); err != nil {
return nil, err
}
return e, nil
}
type resolveUndoDecoder struct {
r io.Reader
}
func (d *resolveUndoDecoder) Decode(ru *ResolveUndo) error {
for {
e, err := d.readEntry()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
ru.Entries = append(ru.Entries, *e)
}
}
func (d *resolveUndoDecoder) readEntry() (*ResolveUndoEntry, error) {
e := &ResolveUndoEntry{
Stages: make(map[Stage]plumbing.Hash),
}
path, err := binary.ReadUntil(d.r, '\x00')
if err != nil {
return nil, err
}
e.Path = string(path)
for i := 0; i < 3; i++ {
if err := d.readStage(e, Stage(i+1)); err != nil {
return nil, err
}
}
for s := range e.Stages {
var hash plumbing.Hash
if err := binary.Read(d.r, hash[:]); err != nil {
return nil, err
}
e.Stages[s] = hash
}
return e, nil
}
func (d *resolveUndoDecoder) readStage(e *ResolveUndoEntry, s Stage) error {
ascii, err := binary.ReadUntil(d.r, '\x00')
if err != nil {
return err
}
stage, err := strconv.ParseInt(string(ascii), 8, 64)
if err != nil {
return err
}
if stage != 0 {
e.Stages[s] = plumbing.ZeroHash
}
return nil
}
type endOfIndexEntryDecoder struct {
r io.Reader
}
func (d *endOfIndexEntryDecoder) Decode(e *EndOfIndexEntry) error {
var err error
e.Offset, err = binary.ReadUint32(d.r)
if err != nil {
return err
}
return binary.Read(d.r, &e.Hash)
}

View File

@ -0,0 +1,360 @@
// Package index implements encoding and decoding of index format files.
//
// Git index format
// ================
//
// == The Git index file has the following format
//
// All binary numbers are in network byte order. Version 2 is described
// here unless stated otherwise.
//
// - A 12-byte header consisting of
//
// 4-byte signature:
// The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
//
// 4-byte version number:
// The current supported versions are 2, 3 and 4.
//
// 32-bit number of index entries.
//
// - A number of sorted index entries (see below).
//
// - Extensions
//
// Extensions are identified by signature. Optional extensions can
// be ignored if Git does not understand them.
//
// Git currently supports cached tree and resolve undo extensions.
//
// 4-byte extension signature. If the first byte is 'A'..'Z' the
// extension is optional and can be ignored.
//
// 32-bit size of the extension
//
// Extension data
//
// - 160-bit SHA-1 over the content of the index file before this
// checksum.
//
// == Index entry
//
// Index entries are sorted in ascending order on the name field,
// interpreted as a string of unsigned bytes (i.e. memcmp() order, no
// localization, no special casing of directory separator '/'). Entries
// with the same name are sorted by their stage field.
//
// 32-bit ctime seconds, the last time a file's metadata changed
// this is stat(2) data
//
// 32-bit ctime nanosecond fractions
// this is stat(2) data
//
// 32-bit mtime seconds, the last time a file's data changed
// this is stat(2) data
//
// 32-bit mtime nanosecond fractions
// this is stat(2) data
//
// 32-bit dev
// this is stat(2) data
//
// 32-bit ino
// this is stat(2) data
//
// 32-bit mode, split into (high to low bits)
//
// 4-bit object type
// valid values in binary are 1000 (regular file), 1010 (symbolic link)
// and 1110 (gitlink)
//
// 3-bit unused
//
// 9-bit unix permission. Only 0755 and 0644 are valid for regular files.
// Symbolic links and gitlinks have value 0 in this field.
//
// 32-bit uid
// this is stat(2) data
//
// 32-bit gid
// this is stat(2) data
//
// 32-bit file size
// This is the on-disk size from stat(2), truncated to 32-bit.
//
// 160-bit SHA-1 for the represented object
//
// A 16-bit 'flags' field split into (high to low bits)
//
// 1-bit assume-valid flag
//
// 1-bit extended flag (must be zero in version 2)
//
// 2-bit stage (during merge)
//
// 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF
// is stored in this field.
//
// (Version 3 or later) A 16-bit field, only applicable if the
// "extended flag" above is 1, split into (high to low bits).
//
// 1-bit reserved for future
//
// 1-bit skip-worktree flag (used by sparse checkout)
//
// 1-bit intent-to-add flag (used by "git add -N")
//
// 13-bit unused, must be zero
//
// Entry path name (variable length) relative to top level directory
// (without leading slash). '/' is used as path separator. The special
// path components ".", ".." and ".git" (without quotes) are disallowed.
// Trailing slash is also disallowed.
//
// The exact encoding is undefined, but the '.' and '/' characters
// are encoded in 7-bit ASCII and the encoding cannot contain a NUL
// byte (iow, this is a UNIX pathname).
//
// (Version 4) In version 4, the entry path name is prefix-compressed
// relative to the path name for the previous entry (the very first
// entry is encoded as if the path name for the previous entry is an
// empty string). At the beginning of an entry, an integer N in the
// variable width encoding (the same encoding as the offset is encoded
// for OFS_DELTA pack entries; see pack-format.txt) is stored, followed
// by a NUL-terminated string S. Removing N bytes from the end of the
// path name for the previous entry, and replacing it with the string S
// yields the path name for this entry.
//
// 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes
// while keeping the name NUL-terminated.
//
// (Version 4) In version 4, the padding after the pathname does not
// exist.
//
// Interpretation of index entries in split index mode is completely
// different. See below for details.
//
// == Extensions
//
// === Cached tree
//
// Cached tree extension contains pre-computed hashes for trees that can
// be derived from the index. It helps speed up tree object generation
// from index for a new commit.
//
// When a path is updated in index, the path must be invalidated and
// removed from tree cache.
//
// The signature for this extension is { 'T', 'R', 'E', 'E' }.
//
// A series of entries fill the entire extension; each of which
// consists of:
//
// - NUL-terminated path component (relative to its parent directory);
//
// - ASCII decimal number of entries in the index that is covered by the
// tree this entry represents (entry_count);
//
// - A space (ASCII 32);
//
// - ASCII decimal number that represents the number of subtrees this
// tree has;
//
// - A newline (ASCII 10); and
//
// - 160-bit object name for the object that would result from writing
// this span of index as a tree.
//
// An entry can be in an invalidated state and is represented by having
// a negative number in the entry_count field. In this case, there is no
// object name and the next entry starts immediately after the newline.
// When writing an invalid entry, -1 should always be used as entry_count.
//
// The entries are written out in the top-down, depth-first order. The
// first entry represents the root level of the repository, followed by the
// first subtree--let's call this A--of the root level (with its name
// relative to the root level), followed by the first subtree of A (with
// its name relative to A), ...
//
// === Resolve undo
//
// A conflict is represented in the index as a set of higher stage entries.
// When a conflict is resolved (e.g. with "git add path"), these higher
// stage entries will be removed and a stage-0 entry with proper resolution
// is added.
//
// When these higher stage entries are removed, they are saved in the
// resolve undo extension, so that conflicts can be recreated (e.g. with
// "git checkout -m"), in case users want to redo a conflict resolution
// from scratch.
//
// The signature for this extension is { 'R', 'E', 'U', 'C' }.
//
// A series of entries fill the entire extension; each of which
// consists of:
//
// - NUL-terminated pathname the entry describes (relative to the root of
// the repository, i.e. full pathname);
//
// - Three NUL-terminated ASCII octal numbers, entry mode of entries in
// stage 1 to 3 (a missing stage is represented by "0" in this field);
// and
//
// - At most three 160-bit object names of the entry in stages from 1 to 3
// (nothing is written for a missing stage).
//
// === Split index
//
// In split index mode, the majority of index entries could be stored
// in a separate file. This extension records the changes to be made on
// top of that to produce the final index.
//
// The signature for this extension is { 'l', 'i', 'n', 'k' }.
//
// The extension consists of:
//
// - 160-bit SHA-1 of the shared index file. The shared index file path
// is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the
// index does not require a shared index file.
//
// - An ewah-encoded delete bitmap, each bit represents an entry in the
// shared index. If a bit is set, its corresponding entry in the
// shared index will be removed from the final index. Note, because
// a delete operation changes index entry positions, but we do need
// original positions in replace phase, it's best to just mark
// entries for removal, then do a mass deletion after replacement.
//
// - An ewah-encoded replace bitmap, each bit represents an entry in
// the shared index. If a bit is set, its corresponding entry in the
// shared index will be replaced with an entry in this index
// file. All replaced entries are stored in sorted order in this
// index. The first "1" bit in the replace bitmap corresponds to the
// first index entry, the second "1" bit to the second entry and so
// on. Replaced entries may have empty path names to save space.
//
// The remaining index entries after replaced ones will be added to the
// final index. These added entries are also sorted by entry name then
// stage.
//
// == Untracked cache
//
// Untracked cache saves the untracked file list and necessary data to
// verify the cache. The signature for this extension is { 'U', 'N',
// 'T', 'R' }.
//
// The extension starts with
//
// - A sequence of NUL-terminated strings, preceded by the size of the
// sequence in variable width encoding. Each string describes the
// environment where the cache can be used.
//
// - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from
// ctime field until "file size".
//
// - Stat data of plumbing.excludesfile
//
// - 32-bit dir_flags (see struct dir_struct)
//
// - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file
// does not exist.
//
// - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does
// not exist.
//
// - NUL-terminated string of per-dir exclude file name. This usually
// is ".gitignore".
//
// - The number of following directory blocks, variable width
// encoding. If this number is zero, the extension ends here with a
// following NUL.
//
// - A number of directory blocks in depth-first-search order, each
// consists of
//
// - The number of untracked entries, variable width encoding.
//
// - The number of sub-directory blocks, variable width encoding.
//
// - The directory name terminated by NUL.
//
// - A number of untracked file/dir names terminated by NUL.
//
// The remaining data of each directory block is grouped by type:
//
// - An ewah bitmap, the n-th bit marks whether the n-th directory has
// valid untracked cache entries.
//
// - An ewah bitmap, the n-th bit records "check-only" bit of
// read_directory_recursive() for the n-th directory.
//
// - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data
// is valid for the n-th directory and exists in the next data.
//
// - An array of stat data. The n-th data corresponds with the n-th
// "one" bit in the previous ewah bitmap.
//
// - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit
// in the previous ewah bitmap.
//
// - One NUL.
//
// == File System Monitor cache
//
// The file system monitor cache tracks files for which the core.fsmonitor
// hook has told us about changes. The signature for this extension is
// { 'F', 'S', 'M', 'N' }.
//
// The extension starts with
//
// - 32-bit version number: the current supported version is 1.
//
// - 64-bit time: the extension data reflects all changes through the given
// time which is stored as the nanoseconds elapsed since midnight,
// January 1, 1970.
//
// - 32-bit bitmap size: the size of the CE_FSMONITOR_VALID bitmap.
//
// - An ewah bitmap, the n-th bit indicates whether the n-th index entry
// is not CE_FSMONITOR_VALID.
//
// == End of Index Entry
//
// The End of Index Entry (EOIE) is used to locate the end of the variable
// length index entries and the begining of the extensions. Code can take
// advantage of this to quickly locate the index extensions without having
// to parse through all of the index entries.
//
// Because it must be able to be loaded before the variable length cache
// entries and other index extensions, this extension must be written last.
// The signature for this extension is { 'E', 'O', 'I', 'E' }.
//
// The extension consists of:
//
// - 32-bit offset to the end of the index entries
//
// - 160-bit SHA-1 over the extension types and their sizes (but not
// their contents). E.g. if we have "TREE" extension that is N-bytes
// long, "REUC" extension that is M-bytes long, followed by "EOIE",
// then the hash would be:
//
// SHA-1("TREE" + <binary representation of N> +
// "REUC" + <binary representation of M>)
//
// == Index Entry Offset Table
//
// The Index Entry Offset Table (IEOT) is used to help address the CPU
// cost of loading the index by enabling multi-threading the process of
// converting cache entries from the on-disk format to the in-memory format.
// The signature for this extension is { 'I', 'E', 'O', 'T' }.
//
// The extension consists of:
//
// - 32-bit version (currently 1)
//
// - A number of index offset entries each consisting of:
//
// - 32-bit offset from the begining of the file to the first cache entry
// in this block of entries.
//
// - 32-bit count of cache entries in this blockpackage index
package index

View File

@ -0,0 +1,150 @@
package index
import (
"bytes"
"crypto/sha1"
"errors"
"hash"
"io"
"sort"
"time"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
var (
// EncodeVersionSupported is the range of supported index versions
EncodeVersionSupported uint32 = 2
// ErrInvalidTimestamp is returned by Encode if a Index with a Entry with
// negative timestamp values
ErrInvalidTimestamp = errors.New("negative timestamps are not allowed")
)
// An Encoder writes an Index to an output stream.
type Encoder struct {
w io.Writer
hash hash.Hash
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
h := sha1.New()
mw := io.MultiWriter(w, h)
return &Encoder{mw, h}
}
// Encode writes the Index to the stream of the encoder.
func (e *Encoder) Encode(idx *Index) error {
// TODO: support versions v3 and v4
// TODO: support extensions
if idx.Version != EncodeVersionSupported {
return ErrUnsupportedVersion
}
if err := e.encodeHeader(idx); err != nil {
return err
}
if err := e.encodeEntries(idx); err != nil {
return err
}
return e.encodeFooter()
}
func (e *Encoder) encodeHeader(idx *Index) error {
return binary.Write(e.w,
indexSignature,
idx.Version,
uint32(len(idx.Entries)),
)
}
func (e *Encoder) encodeEntries(idx *Index) error {
sort.Sort(byName(idx.Entries))
for _, entry := range idx.Entries {
if err := e.encodeEntry(entry); err != nil {
return err
}
wrote := entryHeaderLength + len(entry.Name)
if err := e.padEntry(wrote); err != nil {
return err
}
}
return nil
}
func (e *Encoder) encodeEntry(entry *Entry) error {
if entry.IntentToAdd || entry.SkipWorktree {
return ErrUnsupportedVersion
}
sec, nsec, err := e.timeToUint32(&entry.CreatedAt)
if err != nil {
return err
}
msec, mnsec, err := e.timeToUint32(&entry.ModifiedAt)
if err != nil {
return err
}
flags := uint16(entry.Stage&0x3) << 12
if l := len(entry.Name); l < nameMask {
flags |= uint16(l)
} else {
flags |= nameMask
}
flow := []interface{}{
sec, nsec,
msec, mnsec,
entry.Dev,
entry.Inode,
entry.Mode,
entry.UID,
entry.GID,
entry.Size,
entry.Hash[:],
flags,
}
if err := binary.Write(e.w, flow...); err != nil {
return err
}
return binary.Write(e.w, []byte(entry.Name))
}
func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) {
if t.IsZero() {
return 0, 0, nil
}
if t.Unix() < 0 || t.UnixNano() < 0 {
return 0, 0, ErrInvalidTimestamp
}
return uint32(t.Unix()), uint32(t.Nanosecond()), nil
}
func (e *Encoder) padEntry(wrote int) error {
padLen := 8 - wrote%8
_, err := e.w.Write(bytes.Repeat([]byte{'\x00'}, padLen))
return err
}
func (e *Encoder) encodeFooter() error {
return binary.Write(e.w, e.hash.Sum(nil))
}
type byName []*Entry
func (l byName) Len() int { return len(l) }
func (l byName) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l byName) Less(i, j int) bool { return l[i].Name < l[j].Name }

View File

@ -0,0 +1,213 @@
package index
import (
"bytes"
"errors"
"fmt"
"path/filepath"
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
)
var (
// ErrUnsupportedVersion is returned by Decode when the index file version
// is not supported.
ErrUnsupportedVersion = errors.New("unsupported version")
// ErrEntryNotFound is returned by Index.Entry, if an entry is not found.
ErrEntryNotFound = errors.New("entry not found")
indexSignature = []byte{'D', 'I', 'R', 'C'}
treeExtSignature = []byte{'T', 'R', 'E', 'E'}
resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'}
endOfIndexEntryExtSignature = []byte{'E', 'O', 'I', 'E'}
)
// Stage during merge
type Stage int
const (
// Merged is the default stage, fully merged
Merged Stage = 1
// AncestorMode is the base revision
AncestorMode Stage = 1
// OurMode is the first tree revision, ours
OurMode Stage = 2
// TheirMode is the second tree revision, theirs
TheirMode Stage = 3
)
// Index contains the information about which objects are currently checked out
// in the worktree, having information about the working files. Changes in
// worktree are detected using this Index. The Index is also used during merges
type Index struct {
// Version is index version
Version uint32
// Entries collection of entries represented by this Index. The order of
// this collection is not guaranteed
Entries []*Entry
// Cache represents the 'Cached tree' extension
Cache *Tree
// ResolveUndo represents the 'Resolve undo' extension
ResolveUndo *ResolveUndo
// EndOfIndexEntry represents the 'End of Index Entry' extension
EndOfIndexEntry *EndOfIndexEntry
}
// Add creates a new Entry and returns it. The caller should first check that
// another entry with the same path does not exist.
func (i *Index) Add(path string) *Entry {
e := &Entry{
Name: filepath.ToSlash(path),
}
i.Entries = append(i.Entries, e)
return e
}
// Entry returns the entry that match the given path, if any.
func (i *Index) Entry(path string) (*Entry, error) {
path = filepath.ToSlash(path)
for _, e := range i.Entries {
if e.Name == path {
return e, nil
}
}
return nil, ErrEntryNotFound
}
// Remove remove the entry that match the give path and returns deleted entry.
func (i *Index) Remove(path string) (*Entry, error) {
path = filepath.ToSlash(path)
for index, e := range i.Entries {
if e.Name == path {
i.Entries = append(i.Entries[:index], i.Entries[index+1:]...)
return e, nil
}
}
return nil, ErrEntryNotFound
}
// Glob returns the all entries matching pattern or nil if there is no matching
// entry. The syntax of patterns is the same as in filepath.Glob.
func (i *Index) Glob(pattern string) (matches []*Entry, err error) {
pattern = filepath.ToSlash(pattern)
for _, e := range i.Entries {
m, err := match(pattern, e.Name)
if err != nil {
return nil, err
}
if m {
matches = append(matches, e)
}
}
return
}
// String is equivalent to `git ls-files --stage --debug`
func (i *Index) String() string {
buf := bytes.NewBuffer(nil)
for _, e := range i.Entries {
buf.WriteString(e.String())
}
return buf.String()
}
// Entry represents a single file (or stage of a file) in the cache. An entry
// represents exactly one stage of a file. If a file path is unmerged then
// multiple Entry instances may appear for the same path name.
type Entry struct {
// Hash is the SHA1 of the represented file
Hash plumbing.Hash
// Name is the Entry path name relative to top level directory
Name string
// CreatedAt time when the tracked path was created
CreatedAt time.Time
// ModifiedAt time when the tracked path was changed
ModifiedAt time.Time
// Dev and Inode of the tracked path
Dev, Inode uint32
// Mode of the path
Mode filemode.FileMode
// UID and GID, userid and group id of the owner
UID, GID uint32
// Size is the length in bytes for regular files
Size uint32
// Stage on a merge is defines what stage is representing this entry
// https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging
Stage Stage
// SkipWorktree used in sparse checkouts
// https://git-scm.com/docs/git-read-tree#_sparse_checkout
SkipWorktree bool
// IntentToAdd record only the fact that the path will be added later
// https://git-scm.com/docs/git-add ("git add -N")
IntentToAdd bool
}
func (e Entry) String() string {
buf := bytes.NewBuffer(nil)
fmt.Fprintf(buf, "%06o %s %d\t%s\n", e.Mode, e.Hash, e.Stage, e.Name)
fmt.Fprintf(buf, " ctime: %d:%d\n", e.CreatedAt.Unix(), e.CreatedAt.Nanosecond())
fmt.Fprintf(buf, " mtime: %d:%d\n", e.ModifiedAt.Unix(), e.ModifiedAt.Nanosecond())
fmt.Fprintf(buf, " dev: %d\tino: %d\n", e.Dev, e.Inode)
fmt.Fprintf(buf, " uid: %d\tgid: %d\n", e.UID, e.GID)
fmt.Fprintf(buf, " size: %d\tflags: %x\n", e.Size, 0)
return buf.String()
}
// Tree contains pre-computed hashes for trees that can be derived from the
// index. It helps speed up tree object generation from index for a new commit.
type Tree struct {
Entries []TreeEntry
}
// TreeEntry entry of a cached Tree
type TreeEntry struct {
// Path component (relative to its parent directory)
Path string
// Entries is the number of entries in the index that is covered by the tree
// this entry represents.
Entries int
// Trees is the number that represents the number of subtrees this tree has
Trees int
// Hash object name for the object that would result from writing this span
// of index as a tree.
Hash plumbing.Hash
}
// ResolveUndo is used when a conflict is resolved (e.g. with "git add path"),
// these higher stage entries are removed and a stage-0 entry with proper
// resolution is added. When these higher stage entries are removed, they are
// saved in the resolve undo extension.
type ResolveUndo struct {
Entries []ResolveUndoEntry
}
// ResolveUndoEntry contains the information about a conflict when is resolved
type ResolveUndoEntry struct {
Path string
Stages map[Stage]plumbing.Hash
}
// EndOfIndexEntry is the End of Index Entry (EOIE) is used to locate the end of
// the variable length index entries and the begining of the extensions. Code
// can take advantage of this to quickly locate the index extensions without
// having to parse through all of the index entries.
//
// Because it must be able to be loaded before the variable length cache
// entries and other index extensions, this extension must be written last.
type EndOfIndexEntry struct {
// Offset to the end of the index entries
Offset uint32
// Hash is a SHA-1 over the extension types and their sizes (but not
// their contents).
Hash plumbing.Hash
}

View File

@ -0,0 +1,186 @@
package index
import (
"path/filepath"
"runtime"
"unicode/utf8"
)
// match is filepath.Match with support to match fullpath and not only filenames
// code from:
// https://github.com/golang/go/blob/39852bf4cce6927e01d0136c7843f65a801738cb/src/path/filepath/match.go#L44-L224
func match(pattern, name string) (matched bool, err error) {
Pattern:
for len(pattern) > 0 {
var star bool
var chunk string
star, chunk, pattern = scanChunk(pattern)
// Look for match at current position.
t, ok, err := matchChunk(chunk, name)
// if we're the last chunk, make sure we've exhausted the name
// otherwise we'll give a false result even if we could still match
// using the star
if ok && (len(t) == 0 || len(pattern) > 0) {
name = t
continue
}
if err != nil {
return false, err
}
if star {
// Look for match skipping i+1 bytes.
// Cannot skip /.
for i := 0; i < len(name); i++ {
t, ok, err := matchChunk(chunk, name[i+1:])
if ok {
// if we're the last chunk, make sure we exhausted the name
if len(pattern) == 0 && len(t) > 0 {
continue
}
name = t
continue Pattern
}
if err != nil {
return false, err
}
}
}
return false, nil
}
return len(name) == 0, nil
}
// scanChunk gets the next segment of pattern, which is a non-star string
// possibly preceded by a star.
func scanChunk(pattern string) (star bool, chunk, rest string) {
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
star = true
}
inrange := false
var i int
Scan:
for i = 0; i < len(pattern); i++ {
switch pattern[i] {
case '\\':
if runtime.GOOS != "windows" {
// error check handled in matchChunk: bad pattern.
if i+1 < len(pattern) {
i++
}
}
case '[':
inrange = true
case ']':
inrange = false
case '*':
if !inrange {
break Scan
}
}
}
return star, pattern[0:i], pattern[i:]
}
// matchChunk checks whether chunk matches the beginning of s.
// If so, it returns the remainder of s (after the match).
// Chunk is all single-character operators: literals, char classes, and ?.
func matchChunk(chunk, s string) (rest string, ok bool, err error) {
for len(chunk) > 0 {
if len(s) == 0 {
return
}
switch chunk[0] {
case '[':
// character class
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
chunk = chunk[1:]
// We can't end right after '[', we're expecting at least
// a closing bracket and possibly a caret.
if len(chunk) == 0 {
err = filepath.ErrBadPattern
return
}
// possibly negated
negated := chunk[0] == '^'
if negated {
chunk = chunk[1:]
}
// parse all ranges
match := false
nrange := 0
for {
if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
chunk = chunk[1:]
break
}
var lo, hi rune
if lo, chunk, err = getEsc(chunk); err != nil {
return
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = getEsc(chunk[1:]); err != nil {
return
}
}
if lo <= r && r <= hi {
match = true
}
nrange++
}
if match == negated {
return
}
case '?':
_, n := utf8.DecodeRuneInString(s)
s = s[n:]
chunk = chunk[1:]
case '\\':
if runtime.GOOS != "windows" {
chunk = chunk[1:]
if len(chunk) == 0 {
err = filepath.ErrBadPattern
return
}
}
fallthrough
default:
if chunk[0] != s[0] {
return
}
s = s[1:]
chunk = chunk[1:]
}
}
return s, true, nil
}
// getEsc gets a possibly-escaped character from chunk, for a character class.
func getEsc(chunk string) (r rune, nchunk string, err error) {
if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
err = filepath.ErrBadPattern
return
}
if chunk[0] == '\\' && runtime.GOOS != "windows" {
chunk = chunk[1:]
if len(chunk) == 0 {
err = filepath.ErrBadPattern
return
}
}
r, n := utf8.DecodeRuneInString(chunk)
if r == utf8.RuneError && n == 1 {
err = filepath.ErrBadPattern
}
nchunk = chunk[n:]
if len(nchunk) == 0 {
err = filepath.ErrBadPattern
}
return
}

View File

@ -0,0 +1,2 @@
// Package objfile implements encoding and decoding of object files.
package objfile

View File

@ -0,0 +1,114 @@
package objfile
import (
"compress/zlib"
"errors"
"io"
"strconv"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
)
var (
ErrClosed = errors.New("objfile: already closed")
ErrHeader = errors.New("objfile: invalid header")
ErrNegativeSize = errors.New("objfile: negative object size")
)
// Reader reads and decodes compressed objfile data from a provided io.Reader.
// Reader implements io.ReadCloser. Close should be called when finished with
// the Reader. Close will not close the underlying io.Reader.
type Reader struct {
multi io.Reader
zlib io.ReadCloser
hasher plumbing.Hasher
}
// NewReader returns a new Reader reading from r.
func NewReader(r io.Reader) (*Reader, error) {
zlib, err := zlib.NewReader(r)
if err != nil {
return nil, packfile.ErrZLib.AddDetails(err.Error())
}
return &Reader{
zlib: zlib,
}, nil
}
// Header reads the type and the size of object, and prepares the reader for read
func (r *Reader) Header() (t plumbing.ObjectType, size int64, err error) {
var raw []byte
raw, err = r.readUntil(' ')
if err != nil {
return
}
t, err = plumbing.ParseObjectType(string(raw))
if err != nil {
return
}
raw, err = r.readUntil(0)
if err != nil {
return
}
size, err = strconv.ParseInt(string(raw), 10, 64)
if err != nil {
err = ErrHeader
return
}
defer r.prepareForRead(t, size)
return
}
// readSlice reads one byte at a time from r until it encounters delim or an
// error.
func (r *Reader) readUntil(delim byte) ([]byte, error) {
var buf [1]byte
value := make([]byte, 0, 16)
for {
if n, err := r.zlib.Read(buf[:]); err != nil && (err != io.EOF || n == 0) {
if err == io.EOF {
return nil, ErrHeader
}
return nil, err
}
if buf[0] == delim {
return value, nil
}
value = append(value, buf[0])
}
}
func (r *Reader) prepareForRead(t plumbing.ObjectType, size int64) {
r.hasher = plumbing.NewHasher(t, size)
r.multi = io.TeeReader(r.zlib, r.hasher)
}
// Read reads len(p) bytes into p from the object data stream. It returns
// the number of bytes read (0 <= n <= len(p)) and any error encountered. Even
// if Read returns n < len(p), it may use all of p as scratch space during the
// call.
//
// If Read encounters the end of the data stream it will return err == io.EOF,
// either in the current call if n > 0 or in a subsequent call.
func (r *Reader) Read(p []byte) (n int, err error) {
return r.multi.Read(p)
}
// Hash returns the hash of the object data stream that has been read so far.
func (r *Reader) Hash() plumbing.Hash {
return r.hasher.Sum()
}
// Close releases any resources consumed by the Reader. Calling Close does not
// close the wrapped io.Reader originally passed to NewReader.
func (r *Reader) Close() error {
return r.zlib.Close()
}

View File

@ -0,0 +1,109 @@
package objfile
import (
"compress/zlib"
"errors"
"io"
"strconv"
"gopkg.in/src-d/go-git.v4/plumbing"
)
var (
ErrOverflow = errors.New("objfile: declared data length exceeded (overflow)")
)
// Writer writes and encodes data in compressed objfile format to a provided
// io.Writer. Close should be called when finished with the Writer. Close will
// not close the underlying io.Writer.
type Writer struct {
raw io.Writer
zlib io.WriteCloser
hasher plumbing.Hasher
multi io.Writer
closed bool
pending int64 // number of unwritten bytes
}
// NewWriter returns a new Writer writing to w.
//
// The returned Writer implements io.WriteCloser. Close should be called when
// finished with the Writer. Close will not close the underlying io.Writer.
func NewWriter(w io.Writer) *Writer {
return &Writer{
raw: w,
zlib: zlib.NewWriter(w),
}
}
// WriteHeader writes the type and the size and prepares to accept the object's
// contents. If an invalid t is provided, plumbing.ErrInvalidType is returned. If a
// negative size is provided, ErrNegativeSize is returned.
func (w *Writer) WriteHeader(t plumbing.ObjectType, size int64) error {
if !t.Valid() {
return plumbing.ErrInvalidType
}
if size < 0 {
return ErrNegativeSize
}
b := t.Bytes()
b = append(b, ' ')
b = append(b, []byte(strconv.FormatInt(size, 10))...)
b = append(b, 0)
defer w.prepareForWrite(t, size)
_, err := w.zlib.Write(b)
return err
}
func (w *Writer) prepareForWrite(t plumbing.ObjectType, size int64) {
w.pending = size
w.hasher = plumbing.NewHasher(t, size)
w.multi = io.MultiWriter(w.zlib, w.hasher)
}
// Write writes the object's contents. Write returns the error ErrOverflow if
// more than size bytes are written after WriteHeader.
func (w *Writer) Write(p []byte) (n int, err error) {
if w.closed {
return 0, ErrClosed
}
overwrite := false
if int64(len(p)) > w.pending {
p = p[0:w.pending]
overwrite = true
}
n, err = w.multi.Write(p)
w.pending -= int64(n)
if err == nil && overwrite {
err = ErrOverflow
return
}
return
}
// Hash returns the hash of the object data stream that has been written so far.
// It can be called before or after Close.
func (w *Writer) Hash() plumbing.Hash {
return w.hasher.Sum() // Not yet closed, return hash of data written so far
}
// Close releases any resources consumed by the Writer.
//
// Calling Close does not close the wrapped io.Writer originally passed to
// NewWriter.
func (w *Writer) Close() error {
if err := w.zlib.Close(); err != nil {
return err
}
w.closed = true
return nil
}

View File

@ -0,0 +1,68 @@
package packfile
import (
"bytes"
"io"
"sync"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/utils/ioutil"
)
var signature = []byte{'P', 'A', 'C', 'K'}
const (
// VersionSupported is the packfile version supported by this package
VersionSupported uint32 = 2
firstLengthBits = uint8(4) // the first byte into object header has 4 bits to store the length
lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
maskFirstLength = 15 // 0000 1111
maskContinue = 0x80 // 1000 0000
maskLength = uint8(127) // 0111 1111
maskType = uint8(112) // 0111 0000
)
// UpdateObjectStorage updates the storer with the objects in the given
// packfile.
func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error {
if pw, ok := s.(storer.PackfileWriter); ok {
return WritePackfileToObjectStorage(pw, packfile)
}
p, err := NewParserWithStorage(NewScanner(packfile), s)
if err != nil {
return err
}
_, err = p.Parse()
return err
}
// WritePackfileToObjectStorage writes all the packfile objects into the given
// object storage.
func WritePackfileToObjectStorage(
sw storer.PackfileWriter,
packfile io.Reader,
) (err error) {
w, err := sw.PackfileWriter()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
var n int64
n, err = io.Copy(w, packfile)
if err == nil && n == 0 {
return ErrEmptyPackfile
}
return err
}
var bufPool = sync.Pool{
New: func() interface{} {
return bytes.NewBuffer(nil)
},
}

View File

@ -0,0 +1,297 @@
package packfile
const blksz = 16
const maxChainLength = 64
// deltaIndex is a modified version of JGit's DeltaIndex adapted to our current
// design.
type deltaIndex struct {
table []int
entries []int
mask int
}
func (idx *deltaIndex) init(buf []byte) {
scanner := newDeltaIndexScanner(buf, len(buf))
idx.mask = scanner.mask
idx.table = scanner.table
idx.entries = make([]int, countEntries(scanner)+1)
idx.copyEntries(scanner)
}
// findMatch returns the offset of src where the block starting at tgtOffset
// is and the length of the match. A length of 0 means there was no match. A
// length of -1 means the src length is lower than the blksz and whatever
// other positive length is the length of the match in bytes.
func (idx *deltaIndex) findMatch(src, tgt []byte, tgtOffset int) (srcOffset, l int) {
if len(tgt) < tgtOffset+s {
return 0, len(tgt) - tgtOffset
}
if len(src) < blksz {
return 0, -1
}
if len(tgt) >= tgtOffset+s && len(src) >= blksz {
h := hashBlock(tgt, tgtOffset)
tIdx := h & idx.mask
eIdx := idx.table[tIdx]
if eIdx != 0 {
srcOffset = idx.entries[eIdx]
} else {
return
}
l = matchLength(src, tgt, tgtOffset, srcOffset)
}
return
}
func matchLength(src, tgt []byte, otgt, osrc int) (l int) {
lensrc := len(src)
lentgt := len(tgt)
for (osrc < lensrc && otgt < lentgt) && src[osrc] == tgt[otgt] {
l++
osrc++
otgt++
}
return
}
func countEntries(scan *deltaIndexScanner) (cnt int) {
// Figure out exactly how many entries we need. As we do the
// enumeration truncate any delta chains longer than what we
// are willing to scan during encode. This keeps the encode
// logic linear in the size of the input rather than quadratic.
for i := 0; i < len(scan.table); i++ {
h := scan.table[i]
if h == 0 {
continue
}
size := 0
for {
size++
if size == maxChainLength {
scan.next[h] = 0
break
}
h = scan.next[h]
if h == 0 {
break
}
}
cnt += size
}
return
}
func (idx *deltaIndex) copyEntries(scanner *deltaIndexScanner) {
// Rebuild the entries list from the scanner, positioning all
// blocks in the same hash chain next to each other. We can
// then later discard the next list, along with the scanner.
//
next := 1
for i := 0; i < len(idx.table); i++ {
h := idx.table[i]
if h == 0 {
continue
}
idx.table[i] = next
for {
idx.entries[next] = scanner.entries[h]
next++
h = scanner.next[h]
if h == 0 {
break
}
}
}
}
type deltaIndexScanner struct {
table []int
entries []int
next []int
mask int
count int
}
func newDeltaIndexScanner(buf []byte, size int) *deltaIndexScanner {
size -= size % blksz
worstCaseBlockCnt := size / blksz
if worstCaseBlockCnt < 1 {
return new(deltaIndexScanner)
}
tableSize := tableSize(worstCaseBlockCnt)
scanner := &deltaIndexScanner{
table: make([]int, tableSize),
mask: tableSize - 1,
entries: make([]int, worstCaseBlockCnt+1),
next: make([]int, worstCaseBlockCnt+1),
}
scanner.scan(buf, size)
return scanner
}
// slightly modified version of JGit's DeltaIndexScanner. We store the offset on the entries
// instead of the entries and the key, so we avoid operations to retrieve the offset later, as
// we don't use the key.
// See: https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaIndexScanner.java
func (s *deltaIndexScanner) scan(buf []byte, end int) {
lastHash := 0
ptr := end - blksz
for {
key := hashBlock(buf, ptr)
tIdx := key & s.mask
head := s.table[tIdx]
if head != 0 && lastHash == key {
s.entries[head] = ptr
} else {
s.count++
eIdx := s.count
s.entries[eIdx] = ptr
s.next[eIdx] = head
s.table[tIdx] = eIdx
}
lastHash = key
ptr -= blksz
if 0 > ptr {
break
}
}
}
func tableSize(worstCaseBlockCnt int) int {
shift := 32 - leadingZeros(uint32(worstCaseBlockCnt))
sz := 1 << uint(shift-1)
if sz < worstCaseBlockCnt {
sz <<= 1
}
return sz
}
// use https://golang.org/pkg/math/bits/#LeadingZeros32 in the future
func leadingZeros(x uint32) (n int) {
if x >= 1<<16 {
x >>= 16
n = 16
}
if x >= 1<<8 {
x >>= 8
n += 8
}
n += int(len8tab[x])
return 32 - n
}
var len8tab = [256]uint8{
0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
}
func hashBlock(raw []byte, ptr int) int {
// The first 4 steps collapse out into a 4 byte big-endian decode,
// with a larger right shift as we combined shift lefts together.
//
hash := ((uint32(raw[ptr]) & 0xff) << 24) |
((uint32(raw[ptr+1]) & 0xff) << 16) |
((uint32(raw[ptr+2]) & 0xff) << 8) |
(uint32(raw[ptr+3]) & 0xff)
hash ^= T[hash>>31]
hash = ((hash << 8) | (uint32(raw[ptr+4]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+5]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+6]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+7]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+8]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+9]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+10]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+11]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+12]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+13]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+14]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+15]) & 0xff)) ^ T[hash>>23]
return int(hash)
}
var T = []uint32{0x00000000, 0xd4c6b32d, 0x7d4bd577,
0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99,
0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45,
0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c,
0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895,
0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd,
0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f,
0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181,
0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e,
0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770,
0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d,
0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5,
0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c,
0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084,
0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558,
0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6,
0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788,
0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66,
0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba,
0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c,
0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105,
0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d,
0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990,
0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e,
0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61,
0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f,
0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f,
0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17,
0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e,
0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7,
0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b,
0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5,
0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4,
0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a,
0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96,
0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df,
0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46,
0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e,
0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62,
0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c,
0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93,
0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d,
0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680,
0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8,
0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071,
0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657,
0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b,
0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965,
0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b,
0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5,
0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69,
0xe4fe0d44, 0x4d736b1e, 0x99b5d833,
}

View File

@ -0,0 +1,369 @@
package packfile
import (
"sort"
"sync"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
const (
// deltas based on deltas, how many steps we can do.
// 50 is the default value used in JGit
maxDepth = int64(50)
)
// applyDelta is the set of object types that we should apply deltas
var applyDelta = map[plumbing.ObjectType]bool{
plumbing.BlobObject: true,
plumbing.TreeObject: true,
}
type deltaSelector struct {
storer storer.EncodedObjectStorer
}
func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector {
return &deltaSelector{s}
}
// ObjectsToPack creates a list of ObjectToPack from the hashes
// provided, creating deltas if it's suitable, using an specific
// internal logic. `packWindow` specifies the size of the sliding
// window used to compare objects for delta compression; 0 turns off
// delta compression entirely.
func (dw *deltaSelector) ObjectsToPack(
hashes []plumbing.Hash,
packWindow uint,
) ([]*ObjectToPack, error) {
otp, err := dw.objectsToPack(hashes, packWindow)
if err != nil {
return nil, err
}
if packWindow == 0 {
return otp, nil
}
dw.sort(otp)
var objectGroups [][]*ObjectToPack
var prev *ObjectToPack
i := -1
for _, obj := range otp {
if prev == nil || prev.Type() != obj.Type() {
objectGroups = append(objectGroups, []*ObjectToPack{obj})
i++
prev = obj
} else {
objectGroups[i] = append(objectGroups[i], obj)
}
}
var wg sync.WaitGroup
var once sync.Once
for _, objs := range objectGroups {
objs := objs
wg.Add(1)
go func() {
if walkErr := dw.walk(objs, packWindow); walkErr != nil {
once.Do(func() {
err = walkErr
})
}
wg.Done()
}()
}
wg.Wait()
if err != nil {
return nil, err
}
return otp, nil
}
func (dw *deltaSelector) objectsToPack(
hashes []plumbing.Hash,
packWindow uint,
) ([]*ObjectToPack, error) {
var objectsToPack []*ObjectToPack
for _, h := range hashes {
var o plumbing.EncodedObject
var err error
if packWindow == 0 {
o, err = dw.encodedObject(h)
} else {
o, err = dw.encodedDeltaObject(h)
}
if err != nil {
return nil, err
}
otp := newObjectToPack(o)
if _, ok := o.(plumbing.DeltaObject); ok {
otp.CleanOriginal()
}
objectsToPack = append(objectsToPack, otp)
}
if packWindow == 0 {
return objectsToPack, nil
}
if err := dw.fixAndBreakChains(objectsToPack); err != nil {
return nil, err
}
return objectsToPack, nil
}
func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
edos, ok := dw.storer.(storer.DeltaObjectStorer)
if !ok {
return dw.encodedObject(h)
}
return edos.DeltaObject(plumbing.AnyObject, h)
}
func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
return dw.storer.EncodedObject(plumbing.AnyObject, h)
}
func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error {
m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack))
for _, otp := range objectsToPack {
m[otp.Hash()] = otp
}
for _, otp := range objectsToPack {
if err := dw.fixAndBreakChainsOne(m, otp); err != nil {
return err
}
}
return nil
}
func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error {
if !otp.Object.Type().IsDelta() {
return nil
}
// Initial ObjectToPack instances might have a delta assigned to Object
// but no actual base initially. Once Base is assigned to a delta, it means
// we already fixed it.
if otp.Base != nil {
return nil
}
do, ok := otp.Object.(plumbing.DeltaObject)
if !ok {
// if this is not a DeltaObject, then we cannot retrieve its base,
// so we have to break the delta chain here.
return dw.undeltify(otp)
}
base, ok := objectsToPack[do.BaseHash()]
if !ok {
// The base of the delta is not in our list of objects to pack, so
// we break the chain.
return dw.undeltify(otp)
}
if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
return err
}
otp.SetDelta(base, otp.Object)
return nil
}
func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error {
if otp.Original != nil {
return nil
}
if !otp.Object.Type().IsDelta() {
return nil
}
obj, err := dw.encodedObject(otp.Hash())
if err != nil {
return err
}
otp.SetOriginal(obj)
return nil
}
// undeltify undeltifies an *ObjectToPack by retrieving the original object from
// the storer and resetting it.
func (dw *deltaSelector) undeltify(otp *ObjectToPack) error {
if err := dw.restoreOriginal(otp); err != nil {
return err
}
otp.Object = otp.Original
otp.Depth = 0
return nil
}
func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
sort.Sort(byTypeAndSize(objectsToPack))
}
func (dw *deltaSelector) walk(
objectsToPack []*ObjectToPack,
packWindow uint,
) error {
indexMap := make(map[plumbing.Hash]*deltaIndex)
for i := 0; i < len(objectsToPack); i++ {
// Clean up the index map and reconstructed delta objects for anything
// outside our pack window, to save memory.
if i > int(packWindow) {
obj := objectsToPack[i-int(packWindow)]
delete(indexMap, obj.Hash())
if obj.IsDelta() {
obj.SaveOriginalMetadata()
obj.CleanOriginal()
}
}
target := objectsToPack[i]
// If we already have a delta, we don't try to find a new one for this
// object. This happens when a delta is set to be reused from an existing
// packfile.
if target.IsDelta() {
continue
}
// We only want to create deltas from specific types.
if !applyDelta[target.Type()] {
continue
}
for j := i - 1; j >= 0 && i-j < int(packWindow); j-- {
base := objectsToPack[j]
// Objects must use only the same type as their delta base.
// Since objectsToPack is sorted by type and size, once we find
// a different type, we know we won't find more of them.
if base.Type() != target.Type() {
break
}
if err := dw.tryToDeltify(indexMap, base, target); err != nil {
return err
}
}
}
return nil
}
func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error {
// Original object might not be present if we're reusing a delta, so we
// ensure it is restored.
if err := dw.restoreOriginal(target); err != nil {
return err
}
if err := dw.restoreOriginal(base); err != nil {
return err
}
// If the sizes are radically different, this is a bad pairing.
if target.Size() < base.Size()>>4 {
return nil
}
msz := dw.deltaSizeLimit(
target.Object.Size(),
base.Depth,
target.Depth,
target.IsDelta(),
)
// Nearly impossible to fit useful delta.
if msz <= 8 {
return nil
}
// If we have to insert a lot to make this work, find another.
if base.Size()-target.Size() > msz {
return nil
}
if _, ok := indexMap[base.Hash()]; !ok {
indexMap[base.Hash()] = new(deltaIndex)
}
// Now we can generate the delta using originals
delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original)
if err != nil {
return err
}
// if delta better than target
if delta.Size() < msz {
target.SetDelta(base, delta)
}
return nil
}
func (dw *deltaSelector) deltaSizeLimit(targetSize int64, baseDepth int,
targetDepth int, targetDelta bool) int64 {
if !targetDelta {
// Any delta should be no more than 50% of the original size
// (for text files deflate of whole form should shrink 50%).
n := targetSize >> 1
// Evenly distribute delta size limits over allowed depth.
// If src is non-delta (depth = 0), delta <= 50% of original.
// If src is almost at limit (9/10), delta <= 10% of original.
return n * (maxDepth - int64(baseDepth)) / maxDepth
}
// With a delta base chosen any new delta must be "better".
// Retain the distribution described above.
d := int64(targetDepth)
n := targetSize
// If target depth is bigger than maxDepth, this delta is not suitable to be used.
if d >= maxDepth {
return 0
}
// If src is whole (depth=0) and base is near limit (depth=9/10)
// any delta using src can be 10x larger and still be better.
//
// If src is near limit (depth=9/10) and base is whole (depth=0)
// a new delta dependent on src must be 1/10th the size.
return n * (maxDepth - int64(baseDepth)) / (maxDepth - d)
}
type byTypeAndSize []*ObjectToPack
func (a byTypeAndSize) Len() int { return len(a) }
func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byTypeAndSize) Less(i, j int) bool {
if a[i].Type() < a[j].Type() {
return false
}
if a[i].Type() > a[j].Type() {
return true
}
return a[i].Size() > a[j].Size()
}

View File

@ -0,0 +1,201 @@
package packfile
import (
"bytes"
"gopkg.in/src-d/go-git.v4/plumbing"
)
// See https://github.com/jelmer/dulwich/blob/master/dulwich/pack.py and
// https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js
// for more info
const (
// Standard chunk size used to generate fingerprints
s = 16
// https://github.com/git/git/blob/f7466e94375b3be27f229c78873f0acf8301c0a5/diff-delta.c#L428
// Max size of a copy operation (64KB)
maxCopySize = 64 * 1024
)
// GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object,
// will be loaded into memory to be able to create the delta object.
// To generate target again, you will need the obtained object and "base" one.
// Error will be returned if base or target object cannot be read.
func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) {
return getDelta(new(deltaIndex), base, target)
}
func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) {
br, err := base.Reader()
if err != nil {
return nil, err
}
defer br.Close()
tr, err := target.Reader()
if err != nil {
return nil, err
}
defer tr.Close()
bb := bufPool.Get().(*bytes.Buffer)
bb.Reset()
defer bufPool.Put(bb)
_, err = bb.ReadFrom(br)
if err != nil {
return nil, err
}
tb := bufPool.Get().(*bytes.Buffer)
tb.Reset()
defer bufPool.Put(tb)
_, err = tb.ReadFrom(tr)
if err != nil {
return nil, err
}
db := diffDelta(index, bb.Bytes(), tb.Bytes())
delta := &plumbing.MemoryObject{}
_, err = delta.Write(db)
if err != nil {
return nil, err
}
delta.SetSize(int64(len(db)))
delta.SetType(plumbing.OFSDeltaObject)
return delta, nil
}
// DiffDelta returns the delta that transforms src into tgt.
func DiffDelta(src, tgt []byte) []byte {
return diffDelta(new(deltaIndex), src, tgt)
}
func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
buf.Write(deltaEncodeSize(len(src)))
buf.Write(deltaEncodeSize(len(tgt)))
if len(index.entries) == 0 {
index.init(src)
}
ibuf := bufPool.Get().(*bytes.Buffer)
ibuf.Reset()
for i := 0; i < len(tgt); i++ {
offset, l := index.findMatch(src, tgt, i)
if l == 0 {
// couldn't find a match, just write the current byte and continue
ibuf.WriteByte(tgt[i])
} else if l < 0 {
// src is less than blksz, copy the rest of the target to avoid
// calls to findMatch
for ; i < len(tgt); i++ {
ibuf.WriteByte(tgt[i])
}
} else if l < s {
// remaining target is less than blksz, copy what's left of it
// and avoid calls to findMatch
for j := i; j < i+l; j++ {
ibuf.WriteByte(tgt[j])
}
i += l - 1
} else {
encodeInsertOperation(ibuf, buf)
rl := l
aOffset := offset
for rl > 0 {
if rl < maxCopySize {
buf.Write(encodeCopyOperation(aOffset, rl))
break
}
buf.Write(encodeCopyOperation(aOffset, maxCopySize))
rl -= maxCopySize
aOffset += maxCopySize
}
i += l - 1
}
}
encodeInsertOperation(ibuf, buf)
bytes := buf.Bytes()
bufPool.Put(buf)
bufPool.Put(ibuf)
return bytes
}
func encodeInsertOperation(ibuf, buf *bytes.Buffer) {
if ibuf.Len() == 0 {
return
}
b := ibuf.Bytes()
s := ibuf.Len()
o := 0
for {
if s <= 127 {
break
}
buf.WriteByte(byte(127))
buf.Write(b[o : o+127])
s -= 127
o += 127
}
buf.WriteByte(byte(s))
buf.Write(b[o : o+s])
ibuf.Reset()
}
func deltaEncodeSize(size int) []byte {
var ret []byte
c := size & 0x7f
size >>= 7
for {
if size == 0 {
break
}
ret = append(ret, byte(c|0x80))
c = size & 0x7f
size >>= 7
}
ret = append(ret, byte(c))
return ret
}
func encodeCopyOperation(offset, length int) []byte {
code := 0x80
var opcodes []byte
var i uint
for i = 0; i < 4; i++ {
f := 0xff << (i * 8)
if offset&f != 0 {
opcodes = append(opcodes, byte(offset&f>>(i*8)))
code |= 0x01 << i
}
}
for i = 0; i < 3; i++ {
f := 0xff << (i * 8)
if length&f != 0 {
opcodes = append(opcodes, byte(length&f>>(i*8)))
code |= 0x10 << i
}
}
return append([]byte{byte(code)}, opcodes...)
}

View File

@ -0,0 +1,39 @@
// Package packfile implements encoding and decoding of packfile format.
//
// == pack-*.pack files have the following format:
//
// - A header appears at the beginning and consists of the following:
//
// 4-byte signature:
// The signature is: {'P', 'A', 'C', 'K'}
//
// 4-byte version number (network byte order):
// GIT currently accepts version number 2 or 3 but
// generates version 2 only.
//
// 4-byte number of objects contained in the pack (network byte order)
//
// Observation: we cannot have more than 4G versions ;-) and
// more than 4G objects in a pack.
//
// - The header is followed by number of object entries, each of
// which looks like this:
//
// (undeltified representation)
// n-byte type and length (3-bit type, (n-1)*7+4-bit length)
// compressed data
//
// (deltified representation)
// n-byte type and length (3-bit type, (n-1)*7+4-bit length)
// 20-byte base object name
// compressed delta data
//
// Observation: length of each object is encoded in a variable
// length format and is not constrained to 32-bit or anything.
//
// - The trailer records 20-byte SHA1 checksum of all of the above.
//
//
// Source:
// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt
package packfile

View File

@ -0,0 +1,219 @@
package packfile
import (
"compress/zlib"
"crypto/sha1"
"fmt"
"io"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
// Encoder gets the data from the storage and write it into the writer in PACK
// format
type Encoder struct {
selector *deltaSelector
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
useRefDeltas bool
}
// NewEncoder creates a new packfile encoder using a specific Writer and
// EncodedObjectStorer. By default deltas used to generate the packfile will be
// OFSDeltaObject. To use Reference deltas, set useRefDeltas to true.
func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *Encoder {
h := plumbing.Hasher{
Hash: sha1.New(),
}
mw := io.MultiWriter(w, h)
ow := newOffsetWriter(mw)
zw := zlib.NewWriter(mw)
return &Encoder{
selector: newDeltaSelector(s),
w: ow,
zw: zw,
hasher: h,
useRefDeltas: useRefDeltas,
}
}
// Encode creates a packfile containing all the objects referenced in
// hashes and writes it to the writer in the Encoder. `packWindow`
// specifies the size of the sliding window used to compare objects
// for delta compression; 0 turns off delta compression entirely.
func (e *Encoder) Encode(
hashes []plumbing.Hash,
packWindow uint,
) (plumbing.Hash, error) {
objects, err := e.selector.ObjectsToPack(hashes, packWindow)
if err != nil {
return plumbing.ZeroHash, err
}
return e.encode(objects)
}
func (e *Encoder) encode(objects []*ObjectToPack) (plumbing.Hash, error) {
if err := e.head(len(objects)); err != nil {
return plumbing.ZeroHash, err
}
for _, o := range objects {
if err := e.entry(o); err != nil {
return plumbing.ZeroHash, err
}
}
return e.footer()
}
func (e *Encoder) head(numEntries int) error {
return binary.Write(
e.w,
signature,
int32(VersionSupported),
int32(numEntries),
)
}
func (e *Encoder) entry(o *ObjectToPack) error {
if o.WantWrite() {
// A cycle exists in this delta chain. This should only occur if a
// selected object representation disappeared during writing
// (for example due to a concurrent repack) and a different base
// was chosen, forcing a cycle. Select something other than a
// delta, and write this object.
e.selector.restoreOriginal(o)
o.BackToOriginal()
}
if o.IsWritten() {
return nil
}
o.MarkWantWrite()
if err := e.writeBaseIfDelta(o); err != nil {
return err
}
// We need to check if we already write that object due a cyclic delta chain
if o.IsWritten() {
return nil
}
o.Offset = e.w.Offset()
if o.IsDelta() {
if err := e.writeDeltaHeader(o); err != nil {
return err
}
} else {
if err := e.entryHead(o.Type(), o.Size()); err != nil {
return err
}
}
e.zw.Reset(e.w)
or, err := o.Object.Reader()
if err != nil {
return err
}
_, err = io.Copy(e.zw, or)
if err != nil {
return err
}
return e.zw.Close()
}
func (e *Encoder) writeBaseIfDelta(o *ObjectToPack) error {
if o.IsDelta() && !o.Base.IsWritten() {
// We must write base first
return e.entry(o.Base)
}
return nil
}
func (e *Encoder) writeDeltaHeader(o *ObjectToPack) error {
// Write offset deltas by default
t := plumbing.OFSDeltaObject
if e.useRefDeltas {
t = plumbing.REFDeltaObject
}
if err := e.entryHead(t, o.Object.Size()); err != nil {
return err
}
if e.useRefDeltas {
return e.writeRefDeltaHeader(o.Base.Hash())
} else {
return e.writeOfsDeltaHeader(o)
}
}
func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
return binary.Write(e.w, base)
}
func (e *Encoder) writeOfsDeltaHeader(o *ObjectToPack) error {
// for OFS_DELTA, offset of the base is interpreted as negative offset
// relative to the type-byte of the header of the ofs-delta entry.
relativeOffset := o.Offset - o.Base.Offset
if relativeOffset <= 0 {
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}
return binary.WriteVariableWidthInt(e.w, relativeOffset)
}
func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
t := int64(typeNum)
header := []byte{}
c := (t << firstLengthBits) | (size & maskFirstLength)
size >>= firstLengthBits
for {
if size == 0 {
break
}
header = append(header, byte(c|maskContinue))
c = size & int64(maskLength)
size >>= lengthBits
}
header = append(header, byte(c))
_, err := e.w.Write(header)
return err
}
func (e *Encoder) footer() (plumbing.Hash, error) {
h := e.hasher.Sum()
return h, binary.Write(e.w, h)
}
type offsetWriter struct {
w io.Writer
offset int64
}
func newOffsetWriter(w io.Writer) *offsetWriter {
return &offsetWriter{w: w}
}
func (ow *offsetWriter) Write(p []byte) (n int, err error) {
n, err = ow.w.Write(p)
ow.offset += int64(n)
return n, err
}
func (ow *offsetWriter) Offset() int64 {
return ow.offset
}

View File

@ -0,0 +1,30 @@
package packfile
import "fmt"
// Error specifies errors returned during packfile parsing.
type Error struct {
reason, details string
}
// NewError returns a new error.
func NewError(reason string) *Error {
return &Error{reason: reason}
}
// Error returns a text representation of the error.
func (e *Error) Error() string {
if e.details == "" {
return e.reason
}
return fmt.Sprintf("%s: %s", e.reason, e.details)
}
// AddDetails adds details to an error, with additional text.
func (e *Error) AddDetails(format string, args ...interface{}) *Error {
return &Error{
reason: e.reason,
details: fmt.Sprintf(format, args...),
}
}

View File

@ -0,0 +1,116 @@
package packfile
import (
"io"
billy "gopkg.in/src-d/go-billy.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
)
// FSObject is an object from the packfile on the filesystem.
type FSObject struct {
hash plumbing.Hash
h *ObjectHeader
offset int64
size int64
typ plumbing.ObjectType
index idxfile.Index
fs billy.Filesystem
path string
cache cache.Object
}
// NewFSObject creates a new filesystem object.
func NewFSObject(
hash plumbing.Hash,
finalType plumbing.ObjectType,
offset int64,
contentSize int64,
index idxfile.Index,
fs billy.Filesystem,
path string,
cache cache.Object,
) *FSObject {
return &FSObject{
hash: hash,
offset: offset,
size: contentSize,
typ: finalType,
index: index,
fs: fs,
path: path,
cache: cache,
}
}
// Reader implements the plumbing.EncodedObject interface.
func (o *FSObject) Reader() (io.ReadCloser, error) {
obj, ok := o.cache.Get(o.hash)
if ok && obj != o {
reader, err := obj.Reader()
if err != nil {
return nil, err
}
return reader, nil
}
f, err := o.fs.Open(o.path)
if err != nil {
return nil, err
}
p := NewPackfileWithCache(o.index, nil, f, o.cache)
r, err := p.getObjectContent(o.offset)
if err != nil {
_ = f.Close()
return nil, err
}
if err := f.Close(); err != nil {
return nil, err
}
return r, nil
}
// SetSize implements the plumbing.EncodedObject interface. This method
// is a noop.
func (o *FSObject) SetSize(int64) {}
// SetType implements the plumbing.EncodedObject interface. This method is
// a noop.
func (o *FSObject) SetType(plumbing.ObjectType) {}
// Hash implements the plumbing.EncodedObject interface.
func (o *FSObject) Hash() plumbing.Hash { return o.hash }
// Size implements the plumbing.EncodedObject interface.
func (o *FSObject) Size() int64 { return o.size }
// Type implements the plumbing.EncodedObject interface.
func (o *FSObject) Type() plumbing.ObjectType {
return o.typ
}
// Writer implements the plumbing.EncodedObject interface. This method always
// returns a nil writer.
func (o *FSObject) Writer() (io.WriteCloser, error) {
return nil, nil
}
type objectReader struct {
io.ReadCloser
f billy.File
}
func (r *objectReader) Close() error {
if err := r.ReadCloser.Close(); err != nil {
_ = r.f.Close()
return err
}
return r.f.Close()
}

View File

@ -0,0 +1,164 @@
package packfile
import (
"gopkg.in/src-d/go-git.v4/plumbing"
)
// ObjectToPack is a representation of an object that is going to be into a
// pack file.
type ObjectToPack struct {
// The main object to pack, it could be any object, including deltas
Object plumbing.EncodedObject
// Base is the object that a delta is based on (it could be also another delta).
// If the main object is not a delta, Base will be null
Base *ObjectToPack
// Original is the object that we can generate applying the delta to
// Base, or the same object as Object in the case of a non-delta
// object.
Original plumbing.EncodedObject
// Depth is the amount of deltas needed to resolve to obtain Original
// (delta based on delta based on ...)
Depth int
// offset in pack when object has been already written, or 0 if it
// has not been written yet
Offset int64
// Information from the original object
resolvedOriginal bool
originalType plumbing.ObjectType
originalSize int64
originalHash plumbing.Hash
}
// newObjectToPack creates a correct ObjectToPack based on a non-delta object
func newObjectToPack(o plumbing.EncodedObject) *ObjectToPack {
return &ObjectToPack{
Object: o,
Original: o,
}
}
// newDeltaObjectToPack creates a correct ObjectToPack for a delta object, based on
// his base (could be another delta), the delta target (in this case called original),
// and the delta Object itself
func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedObject) *ObjectToPack {
return &ObjectToPack{
Object: delta,
Base: base,
Original: original,
Depth: base.Depth + 1,
}
}
// BackToOriginal converts that ObjectToPack to a non-deltified object if it was one
func (o *ObjectToPack) BackToOriginal() {
if o.IsDelta() && o.Original != nil {
o.Object = o.Original
o.Base = nil
o.Depth = 0
}
}
// IsWritten returns if that ObjectToPack was
// already written into the packfile or not
func (o *ObjectToPack) IsWritten() bool {
return o.Offset > 1
}
// MarkWantWrite marks this ObjectToPack as WantWrite
// to avoid delta chain loops
func (o *ObjectToPack) MarkWantWrite() {
o.Offset = 1
}
// WantWrite checks if this ObjectToPack was marked as WantWrite before
func (o *ObjectToPack) WantWrite() bool {
return o.Offset == 1
}
// SetOriginal sets both Original and saves size, type and hash. If object
// is nil Original is set but previous resolved values are kept
func (o *ObjectToPack) SetOriginal(obj plumbing.EncodedObject) {
o.Original = obj
o.SaveOriginalMetadata()
}
// SaveOriginalMetadata saves size, type and hash of Original object
func (o *ObjectToPack) SaveOriginalMetadata() {
if o.Original != nil {
o.originalSize = o.Original.Size()
o.originalType = o.Original.Type()
o.originalHash = o.Original.Hash()
o.resolvedOriginal = true
}
}
// CleanOriginal sets Original to nil
func (o *ObjectToPack) CleanOriginal() {
o.Original = nil
}
func (o *ObjectToPack) Type() plumbing.ObjectType {
if o.Original != nil {
return o.Original.Type()
}
if o.resolvedOriginal {
return o.originalType
}
if o.Base != nil {
return o.Base.Type()
}
if o.Object != nil {
return o.Object.Type()
}
panic("cannot get type")
}
func (o *ObjectToPack) Hash() plumbing.Hash {
if o.Original != nil {
return o.Original.Hash()
}
if o.resolvedOriginal {
return o.originalHash
}
do, ok := o.Object.(plumbing.DeltaObject)
if ok {
return do.ActualHash()
}
panic("cannot get hash")
}
func (o *ObjectToPack) Size() int64 {
if o.Original != nil {
return o.Original.Size()
}
if o.resolvedOriginal {
return o.originalSize
}
do, ok := o.Object.(plumbing.DeltaObject)
if ok {
return do.ActualSize()
}
panic("cannot get ObjectToPack size")
}
func (o *ObjectToPack) IsDelta() bool {
return o.Base != nil
}
func (o *ObjectToPack) SetDelta(base *ObjectToPack, delta plumbing.EncodedObject) {
o.Object = delta
o.Base = base
o.Depth = base.Depth + 1
}

View File

@ -0,0 +1,487 @@
package packfile
import (
"bytes"
"io"
"os"
billy "gopkg.in/src-d/go-billy.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
var (
// ErrInvalidObject is returned by Decode when an invalid object is
// found in the packfile.
ErrInvalidObject = NewError("invalid git object")
// ErrZLib is returned by Decode when there was an error unzipping
// the packfile contents.
ErrZLib = NewError("zlib reading error")
)
// When reading small objects from packfile it is beneficial to do so at
// once to exploit the buffered I/O. In many cases the objects are so small
// that they were already loaded to memory when the object header was
// loaded from the packfile. Wrapping in FSObject would cause this buffered
// data to be thrown away and then re-read later, with the additional
// seeking causing reloads from disk. Objects smaller than this threshold
// are now always read into memory and stored in cache instead of being
// wrapped in FSObject.
const smallObjectThreshold = 16 * 1024
// Packfile allows retrieving information from inside a packfile.
type Packfile struct {
idxfile.Index
fs billy.Filesystem
file billy.File
s *Scanner
deltaBaseCache cache.Object
offsetToType map[int64]plumbing.ObjectType
}
// NewPackfileWithCache creates a new Packfile with the given object cache.
// If the filesystem is provided, the packfile will return FSObjects, otherwise
// it will return MemoryObjects.
func NewPackfileWithCache(
index idxfile.Index,
fs billy.Filesystem,
file billy.File,
cache cache.Object,
) *Packfile {
s := NewScanner(file)
return &Packfile{
index,
fs,
file,
s,
cache,
make(map[int64]plumbing.ObjectType),
}
}
// NewPackfile returns a packfile representation for the given packfile file
// and packfile idx.
// If the filesystem is provided, the packfile will return FSObjects, otherwise
// it will return MemoryObjects.
func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile {
return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault())
}
// Get retrieves the encoded object in the packfile with the given hash.
func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) {
offset, err := p.FindOffset(h)
if err != nil {
return nil, err
}
return p.GetByOffset(offset)
}
// GetByOffset retrieves the encoded object from the packfile with the given
// offset.
func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
hash, err := p.FindHash(o)
if err == nil {
if obj, ok := p.deltaBaseCache.Get(hash); ok {
return obj, nil
}
}
return p.objectAtOffset(o)
}
// GetSizeByOffset retrieves the size of the encoded object from the
// packfile with the given offset.
func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) {
if _, err := p.s.SeekFromStart(o); err != nil {
if err == io.EOF || isInvalid(err) {
return 0, plumbing.ErrObjectNotFound
}
return 0, err
}
h, err := p.nextObjectHeader()
if err != nil {
return 0, err
}
return p.getObjectSize(h)
}
func (p *Packfile) objectHeaderAtOffset(offset int64) (*ObjectHeader, error) {
h, err := p.s.SeekObjectHeader(offset)
p.s.pendingObject = nil
return h, err
}
func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
h, err := p.s.NextObjectHeader()
p.s.pendingObject = nil
return h, err
}
func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
return h.Length, nil
case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
defer bufPool.Put(buf)
if _, _, err := p.s.NextObject(buf); err != nil {
return 0, err
}
delta := buf.Bytes()
_, delta = decodeLEB128(delta) // skip src size
sz, _ := decodeLEB128(delta)
return int64(sz), nil
default:
return 0, ErrInvalidObject.AddDetails("type %q", h.Type)
}
}
func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err error) {
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
return h.Type, nil
case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
var offset int64
if h.Type == plumbing.REFDeltaObject {
offset, err = p.FindOffset(h.Reference)
if err != nil {
return
}
} else {
offset = h.OffsetReference
}
if baseType, ok := p.offsetToType[offset]; ok {
typ = baseType
} else {
h, err = p.objectHeaderAtOffset(offset)
if err != nil {
return
}
typ, err = p.getObjectType(h)
if err != nil {
return
}
}
default:
err = ErrInvalidObject.AddDetails("type %q", h.Type)
}
return
}
func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) {
h, err := p.objectHeaderAtOffset(offset)
if err != nil {
if err == io.EOF || isInvalid(err) {
return nil, plumbing.ErrObjectNotFound
}
return nil, err
}
// If we have no filesystem, we will return a MemoryObject instead
// of an FSObject.
if p.fs == nil {
return p.getNextObject(h)
}
// If the object is not a delta and it's small enough then read it
// completely into memory now since it is already read from disk
// into buffer anyway.
if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
return p.getNextObject(h)
}
hash, err := p.FindHash(h.Offset)
if err != nil {
return nil, err
}
size, err := p.getObjectSize(h)
if err != nil {
return nil, err
}
typ, err := p.getObjectType(h)
if err != nil {
return nil, err
}
p.offsetToType[h.Offset] = typ
return NewFSObject(
hash,
typ,
h.Offset,
size,
p.Index,
p.fs,
p.file.Name(),
p.deltaBaseCache,
), nil
}
func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
ref, err := p.FindHash(offset)
if err == nil {
obj, ok := p.cacheGet(ref)
if ok {
reader, err := obj.Reader()
if err != nil {
return nil, err
}
return reader, nil
}
}
h, err := p.objectHeaderAtOffset(offset)
if err != nil {
return nil, err
}
obj, err := p.getNextObject(h)
if err != nil {
return nil, err
}
return obj.Reader()
}
func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
var obj = new(plumbing.MemoryObject)
obj.SetSize(h.Length)
obj.SetType(h.Type)
var err error
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
err = p.fillRegularObjectContent(obj)
case plumbing.REFDeltaObject:
err = p.fillREFDeltaObjectContent(obj, h.Reference)
case plumbing.OFSDeltaObject:
err = p.fillOFSDeltaObjectContent(obj, h.OffsetReference)
default:
err = ErrInvalidObject.AddDetails("type %q", h.Type)
}
if err != nil {
return nil, err
}
return obj, nil
}
func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error {
w, err := obj.Writer()
if err != nil {
return err
}
_, _, err = p.s.NextObject(w)
p.cachePut(obj)
return err
}
func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
_, _, err := p.s.NextObject(buf)
if err != nil {
return err
}
base, ok := p.cacheGet(ref)
if !ok {
base, err = p.Get(ref)
if err != nil {
return err
}
}
obj.SetType(base.Type())
err = ApplyDelta(obj, base, buf.Bytes())
p.cachePut(obj)
bufPool.Put(buf)
return err
}
func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error {
buf := bytes.NewBuffer(nil)
_, _, err := p.s.NextObject(buf)
if err != nil {
return err
}
var base plumbing.EncodedObject
var ok bool
hash, err := p.FindHash(offset)
if err == nil {
base, ok = p.cacheGet(hash)
}
if !ok {
base, err = p.GetByOffset(offset)
if err != nil {
return err
}
}
obj.SetType(base.Type())
err = ApplyDelta(obj, base, buf.Bytes())
p.cachePut(obj)
return err
}
func (p *Packfile) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) {
if p.deltaBaseCache == nil {
return nil, false
}
return p.deltaBaseCache.Get(h)
}
func (p *Packfile) cachePut(obj plumbing.EncodedObject) {
if p.deltaBaseCache == nil {
return
}
p.deltaBaseCache.Put(obj)
}
// GetAll returns an iterator with all encoded objects in the packfile.
// The iterator returned is not thread-safe, it should be used in the same
// thread as the Packfile instance.
func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) {
return p.GetByType(plumbing.AnyObject)
}
// GetByType returns all the objects of the given type.
func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) {
switch typ {
case plumbing.AnyObject,
plumbing.BlobObject,
plumbing.TreeObject,
plumbing.CommitObject,
plumbing.TagObject:
entries, err := p.EntriesByOffset()
if err != nil {
return nil, err
}
return &objectIter{
// Easiest way to provide an object decoder is just to pass a Packfile
// instance. To not mess with the seeks, it's a new instance with a
// different scanner but the same cache and offset to hash map for
// reusing as much cache as possible.
p: p,
iter: entries,
typ: typ,
}, nil
default:
return nil, plumbing.ErrInvalidType
}
}
// ID returns the ID of the packfile, which is the checksum at the end of it.
func (p *Packfile) ID() (plumbing.Hash, error) {
prev, err := p.file.Seek(-20, io.SeekEnd)
if err != nil {
return plumbing.ZeroHash, err
}
var hash plumbing.Hash
if _, err := io.ReadFull(p.file, hash[:]); err != nil {
return plumbing.ZeroHash, err
}
if _, err := p.file.Seek(prev, io.SeekStart); err != nil {
return plumbing.ZeroHash, err
}
return hash, nil
}
// Close the packfile and its resources.
func (p *Packfile) Close() error {
closer, ok := p.file.(io.Closer)
if !ok {
return nil
}
return closer.Close()
}
type objectIter struct {
p *Packfile
typ plumbing.ObjectType
iter idxfile.EntryIter
}
func (i *objectIter) Next() (plumbing.EncodedObject, error) {
for {
e, err := i.iter.Next()
if err != nil {
return nil, err
}
obj, err := i.p.GetByOffset(int64(e.Offset))
if err != nil {
return nil, err
}
if i.typ == plumbing.AnyObject || obj.Type() == i.typ {
return obj, nil
}
}
}
func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error {
for {
o, err := i.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if err := f(o); err != nil {
return err
}
}
}
func (i *objectIter) Close() {
i.iter.Close()
}
// isInvalid checks whether an error is an os.PathError with an os.ErrInvalid
// error inside. It also checks for the windows error, which is different from
// os.ErrInvalid.
func isInvalid(err error) bool {
pe, ok := err.(*os.PathError)
if !ok {
return false
}
errstr := pe.Err.Error()
return errstr == errInvalidUnix || errstr == errInvalidWindows
}
// errInvalidWindows is the Windows equivalent to os.ErrInvalid
const errInvalidWindows = "The parameter is incorrect."
var errInvalidUnix = os.ErrInvalid.Error()

View File

@ -0,0 +1,483 @@
package packfile
import (
"bytes"
"errors"
"io"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
var (
// ErrReferenceDeltaNotFound is returned when the reference delta is not
// found.
ErrReferenceDeltaNotFound = errors.New("reference delta not found")
// ErrNotSeekableSource is returned when the source for the parser is not
// seekable and a storage was not provided, so it can't be parsed.
ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided")
// ErrDeltaNotCached is returned when the delta could not be found in cache.
ErrDeltaNotCached = errors.New("delta could not be found in cache")
)
// Observer interface is implemented by index encoders.
type Observer interface {
// OnHeader is called when a new packfile is opened.
OnHeader(count uint32) error
// OnInflatedObjectHeader is called for each object header read.
OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
// OnInflatedObjectContent is called for each decoded object.
OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error
// OnFooter is called when decoding is done.
OnFooter(h plumbing.Hash) error
}
// Parser decodes a packfile and calls any observer associated to it. Is used
// to generate indexes.
type Parser struct {
storage storer.EncodedObjectStorer
scanner *Scanner
count uint32
oi []*objectInfo
oiByHash map[plumbing.Hash]*objectInfo
oiByOffset map[int64]*objectInfo
hashOffset map[plumbing.Hash]int64
checksum plumbing.Hash
cache *cache.BufferLRU
// delta content by offset, only used if source is not seekable
deltas map[int64][]byte
ob []Observer
}
// NewParser creates a new Parser. The Scanner source must be seekable.
// If it's not, NewParserWithStorage should be used instead.
func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) {
return NewParserWithStorage(scanner, nil, ob...)
}
// NewParserWithStorage creates a new Parser. The scanner source must either
// be seekable or a storage must be provided.
func NewParserWithStorage(
scanner *Scanner,
storage storer.EncodedObjectStorer,
ob ...Observer,
) (*Parser, error) {
if !scanner.IsSeekable && storage == nil {
return nil, ErrNotSeekableSource
}
var deltas map[int64][]byte
if !scanner.IsSeekable {
deltas = make(map[int64][]byte)
}
return &Parser{
storage: storage,
scanner: scanner,
ob: ob,
count: 0,
cache: cache.NewBufferLRUDefault(),
deltas: deltas,
}, nil
}
func (p *Parser) forEachObserver(f func(o Observer) error) error {
for _, o := range p.ob {
if err := f(o); err != nil {
return err
}
}
return nil
}
func (p *Parser) onHeader(count uint32) error {
return p.forEachObserver(func(o Observer) error {
return o.OnHeader(count)
})
}
func (p *Parser) onInflatedObjectHeader(
t plumbing.ObjectType,
objSize int64,
pos int64,
) error {
return p.forEachObserver(func(o Observer) error {
return o.OnInflatedObjectHeader(t, objSize, pos)
})
}
func (p *Parser) onInflatedObjectContent(
h plumbing.Hash,
pos int64,
crc uint32,
content []byte,
) error {
return p.forEachObserver(func(o Observer) error {
return o.OnInflatedObjectContent(h, pos, crc, content)
})
}
func (p *Parser) onFooter(h plumbing.Hash) error {
return p.forEachObserver(func(o Observer) error {
return o.OnFooter(h)
})
}
// Parse start decoding phase of the packfile.
func (p *Parser) Parse() (plumbing.Hash, error) {
if err := p.init(); err != nil {
return plumbing.ZeroHash, err
}
if err := p.indexObjects(); err != nil {
return plumbing.ZeroHash, err
}
var err error
p.checksum, err = p.scanner.Checksum()
if err != nil && err != io.EOF {
return plumbing.ZeroHash, err
}
if err := p.resolveDeltas(); err != nil {
return plumbing.ZeroHash, err
}
if err := p.onFooter(p.checksum); err != nil {
return plumbing.ZeroHash, err
}
return p.checksum, nil
}
func (p *Parser) init() error {
_, c, err := p.scanner.Header()
if err != nil {
return err
}
if err := p.onHeader(c); err != nil {
return err
}
p.count = c
p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
p.oiByOffset = make(map[int64]*objectInfo, p.count)
p.oi = make([]*objectInfo, p.count)
return nil
}
func (p *Parser) indexObjects() error {
buf := new(bytes.Buffer)
for i := uint32(0); i < p.count; i++ {
buf.Reset()
oh, err := p.scanner.NextObjectHeader()
if err != nil {
return err
}
delta := false
var ota *objectInfo
switch t := oh.Type; t {
case plumbing.OFSDeltaObject:
delta = true
parent, ok := p.oiByOffset[oh.OffsetReference]
if !ok {
return plumbing.ErrObjectNotFound
}
ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
parent.Children = append(parent.Children, ota)
case plumbing.REFDeltaObject:
delta = true
parent, ok := p.oiByHash[oh.Reference]
if !ok {
// can't find referenced object in this pack file
// this must be a "thin" pack.
parent = &objectInfo{ //Placeholder parent
SHA1: oh.Reference,
ExternalRef: true, // mark as an external reference that must be resolved
Type: plumbing.AnyObject,
DiskType: plumbing.AnyObject,
}
p.oiByHash[oh.Reference] = parent
}
ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
parent.Children = append(parent.Children, ota)
default:
ota = newBaseObject(oh.Offset, oh.Length, t)
}
_, crc, err := p.scanner.NextObject(buf)
if err != nil {
return err
}
ota.Crc32 = crc
ota.Length = oh.Length
data := buf.Bytes()
if !delta {
sha1, err := getSHA1(ota.Type, data)
if err != nil {
return err
}
ota.SHA1 = sha1
p.oiByHash[ota.SHA1] = ota
}
if p.storage != nil && !delta {
obj := new(plumbing.MemoryObject)
obj.SetSize(oh.Length)
obj.SetType(oh.Type)
if _, err := obj.Write(data); err != nil {
return err
}
if _, err := p.storage.SetEncodedObject(obj); err != nil {
return err
}
}
if delta && !p.scanner.IsSeekable {
p.deltas[oh.Offset] = make([]byte, len(data))
copy(p.deltas[oh.Offset], data)
}
p.oiByOffset[oh.Offset] = ota
p.oi[i] = ota
}
return nil
}
func (p *Parser) resolveDeltas() error {
for _, obj := range p.oi {
content, err := p.get(obj)
if err != nil {
return err
}
if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
return err
}
if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil {
return err
}
if !obj.IsDelta() && len(obj.Children) > 0 {
for _, child := range obj.Children {
if _, err := p.resolveObject(child, content); err != nil {
return err
}
}
// Remove the delta from the cache.
if obj.DiskType.IsDelta() && !p.scanner.IsSeekable {
delete(p.deltas, obj.Offset)
}
}
}
return nil
}
func (p *Parser) get(o *objectInfo) (b []byte, err error) {
var ok bool
if !o.ExternalRef { // skip cache check for placeholder parents
b, ok = p.cache.Get(o.Offset)
}
// If it's not on the cache and is not a delta we can try to find it in the
// storage, if there's one. External refs must enter here.
if !ok && p.storage != nil && !o.Type.IsDelta() {
e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
if err != nil {
return nil, err
}
o.Type = e.Type()
r, err := e.Reader()
if err != nil {
return nil, err
}
b = make([]byte, e.Size())
if _, err = r.Read(b); err != nil {
return nil, err
}
}
if b != nil {
return b, nil
}
if o.ExternalRef {
// we were not able to resolve a ref in a thin pack
return nil, ErrReferenceDeltaNotFound
}
var data []byte
if o.DiskType.IsDelta() {
base, err := p.get(o.Parent)
if err != nil {
return nil, err
}
data, err = p.resolveObject(o, base)
if err != nil {
return nil, err
}
} else {
data, err = p.readData(o)
if err != nil {
return nil, err
}
}
if len(o.Children) > 0 {
p.cache.Put(o.Offset, data)
}
return data, nil
}
func (p *Parser) resolveObject(
o *objectInfo,
base []byte,
) ([]byte, error) {
if !o.DiskType.IsDelta() {
return nil, nil
}
data, err := p.readData(o)
if err != nil {
return nil, err
}
data, err = applyPatchBase(o, data, base)
if err != nil {
return nil, err
}
if p.storage != nil {
obj := new(plumbing.MemoryObject)
obj.SetSize(o.Size())
obj.SetType(o.Type)
if _, err := obj.Write(data); err != nil {
return nil, err
}
if _, err := p.storage.SetEncodedObject(obj); err != nil {
return nil, err
}
}
return data, nil
}
func (p *Parser) readData(o *objectInfo) ([]byte, error) {
if !p.scanner.IsSeekable && o.DiskType.IsDelta() {
data, ok := p.deltas[o.Offset]
if !ok {
return nil, ErrDeltaNotCached
}
return data, nil
}
if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
return nil, err
}
buf := new(bytes.Buffer)
if _, _, err := p.scanner.NextObject(buf); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
patched, err := PatchDelta(base, data)
if err != nil {
return nil, err
}
if ota.SHA1 == plumbing.ZeroHash {
ota.Type = ota.Parent.Type
sha1, err := getSHA1(ota.Type, patched)
if err != nil {
return nil, err
}
ota.SHA1 = sha1
ota.Length = int64(len(patched))
}
return patched, nil
}
func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {
hasher := plumbing.NewHasher(t, int64(len(data)))
if _, err := hasher.Write(data); err != nil {
return plumbing.ZeroHash, err
}
return hasher.Sum(), nil
}
type objectInfo struct {
Offset int64
Length int64
Type plumbing.ObjectType
DiskType plumbing.ObjectType
ExternalRef bool // indicates this is an external reference in a thin pack file
Crc32 uint32
Parent *objectInfo
Children []*objectInfo
SHA1 plumbing.Hash
}
func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
return newDeltaObject(offset, length, t, nil)
}
func newDeltaObject(
offset, length int64,
t plumbing.ObjectType,
parent *objectInfo,
) *objectInfo {
obj := &objectInfo{
Offset: offset,
Length: length,
Type: t,
DiskType: t,
Crc32: 0,
Parent: parent,
}
return obj
}
func (o *objectInfo) IsDelta() bool {
return o.Type.IsDelta()
}
func (o *objectInfo) Size() int64 {
return o.Length
}

View File

@ -0,0 +1,229 @@
package packfile
import (
"errors"
"io/ioutil"
"gopkg.in/src-d/go-git.v4/plumbing"
)
// See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h
// https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c,
// and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js
// for details about the delta format.
const deltaSizeMin = 4
// ApplyDelta writes to target the result of applying the modification deltas in delta to base.
func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) error {
r, err := base.Reader()
if err != nil {
return err
}
w, err := target.Writer()
if err != nil {
return err
}
src, err := ioutil.ReadAll(r)
if err != nil {
return err
}
dst, err := PatchDelta(src, delta)
if err != nil {
return err
}
target.SetSize(int64(len(dst)))
_, err = w.Write(dst)
return err
}
var (
ErrInvalidDelta = errors.New("invalid delta")
ErrDeltaCmd = errors.New("wrong delta command")
)
// PatchDelta returns the result of applying the modification deltas in delta to src.
// An error will be returned if delta is corrupted (ErrDeltaLen) or an action command
// is not copy from source or copy from delta (ErrDeltaCmd).
func PatchDelta(src, delta []byte) ([]byte, error) {
if len(delta) < deltaSizeMin {
return nil, ErrInvalidDelta
}
srcSz, delta := decodeLEB128(delta)
if srcSz != uint(len(src)) {
return nil, ErrInvalidDelta
}
targetSz, delta := decodeLEB128(delta)
remainingTargetSz := targetSz
var cmd byte
dest := make([]byte, 0, targetSz)
for {
if len(delta) == 0 {
return nil, ErrInvalidDelta
}
cmd = delta[0]
delta = delta[1:]
if isCopyFromSrc(cmd) {
var offset, sz uint
var err error
offset, delta, err = decodeOffset(cmd, delta)
if err != nil {
return nil, err
}
sz, delta, err = decodeSize(cmd, delta)
if err != nil {
return nil, err
}
if invalidSize(sz, targetSz) ||
invalidOffsetSize(offset, sz, srcSz) {
break
}
dest = append(dest, src[offset:offset+sz]...)
remainingTargetSz -= sz
} else if isCopyFromDelta(cmd) {
sz := uint(cmd) // cmd is the size itself
if invalidSize(sz, targetSz) {
return nil, ErrInvalidDelta
}
if uint(len(delta)) < sz {
return nil, ErrInvalidDelta
}
dest = append(dest, delta[0:sz]...)
remainingTargetSz -= sz
delta = delta[sz:]
} else {
return nil, ErrDeltaCmd
}
if remainingTargetSz <= 0 {
break
}
}
return dest, nil
}
// Decodes a number encoded as an unsigned LEB128 at the start of some
// binary data and returns the decoded number and the rest of the
// stream.
//
// This must be called twice on the delta data buffer, first to get the
// expected source buffer size, and again to get the target buffer size.
func decodeLEB128(input []byte) (uint, []byte) {
var num, sz uint
var b byte
for {
b = input[sz]
num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
sz++
if uint(b)&continuation == 0 || sz == uint(len(input)) {
break
}
}
return num, input[sz:]
}
const (
payload = 0x7f // 0111 1111
continuation = 0x80 // 1000 0000
)
func isCopyFromSrc(cmd byte) bool {
return (cmd & 0x80) != 0
}
func isCopyFromDelta(cmd byte) bool {
return (cmd&0x80) == 0 && cmd != 0
}
func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
var offset uint
if (cmd & 0x01) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
offset = uint(delta[0])
delta = delta[1:]
}
if (cmd & 0x02) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
offset |= uint(delta[0]) << 8
delta = delta[1:]
}
if (cmd & 0x04) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
offset |= uint(delta[0]) << 16
delta = delta[1:]
}
if (cmd & 0x08) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
offset |= uint(delta[0]) << 24
delta = delta[1:]
}
return offset, delta, nil
}
func decodeSize(cmd byte, delta []byte) (uint, []byte, error) {
var sz uint
if (cmd & 0x10) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
sz = uint(delta[0])
delta = delta[1:]
}
if (cmd & 0x20) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
sz |= uint(delta[0]) << 8
delta = delta[1:]
}
if (cmd & 0x40) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
sz |= uint(delta[0]) << 16
delta = delta[1:]
}
if sz == 0 {
sz = 0x10000
}
return sz, delta, nil
}
func invalidSize(sz, targetSz uint) bool {
return sz > targetSz
}
func invalidOffsetSize(offset, sz, srcSz uint) bool {
return sumOverflows(offset, sz) ||
offset+sz > srcSz
}
func sumOverflows(a, b uint) bool {
return a+b < a
}

View File

@ -0,0 +1,487 @@
package packfile
import (
"bufio"
"bytes"
"compress/zlib"
"fmt"
"hash"
"hash/crc32"
"io"
stdioutil "io/ioutil"
"sync"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
"gopkg.in/src-d/go-git.v4/utils/ioutil"
)
var (
// ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
ErrEmptyPackfile = NewError("empty packfile")
// ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
ErrBadSignature = NewError("malformed pack file signature")
// ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
// different than VersionSupported.
ErrUnsupportedVersion = NewError("unsupported packfile version")
// ErrSeekNotSupported returned if seek is not support
ErrSeekNotSupported = NewError("not seek support")
)
// ObjectHeader contains the information related to the object, this information
// is collected from the previous bytes to the content of the object.
type ObjectHeader struct {
Type plumbing.ObjectType
Offset int64
Length int64
Reference plumbing.Hash
OffsetReference int64
}
type Scanner struct {
r reader
zr readerResetter
crc hash.Hash32
// pendingObject is used to detect if an object has been read, or still
// is waiting to be read
pendingObject *ObjectHeader
version, objects uint32
// lsSeekable says if this scanner can do Seek or not, to have a Scanner
// seekable a r implementing io.Seeker is required
IsSeekable bool
}
// NewScanner returns a new Scanner based on a reader, if the given reader
// implements io.ReadSeeker the Scanner will be also Seekable
func NewScanner(r io.Reader) *Scanner {
seeker, ok := r.(io.ReadSeeker)
if !ok {
seeker = &trackableReader{Reader: r}
}
crc := crc32.NewIEEE()
return &Scanner{
r: newTeeReader(newByteReadSeeker(seeker), crc),
crc: crc,
IsSeekable: ok,
}
}
// Header reads the whole packfile header (signature, version and object count).
// It returns the version and the object count and performs checks on the
// validity of the signature and the version fields.
func (s *Scanner) Header() (version, objects uint32, err error) {
if s.version != 0 {
return s.version, s.objects, nil
}
sig, err := s.readSignature()
if err != nil {
if err == io.EOF {
err = ErrEmptyPackfile
}
return
}
if !s.isValidSignature(sig) {
err = ErrBadSignature
return
}
version, err = s.readVersion()
s.version = version
if err != nil {
return
}
if !s.isSupportedVersion(version) {
err = ErrUnsupportedVersion.AddDetails("%d", version)
return
}
objects, err = s.readCount()
s.objects = objects
return
}
// readSignature reads an returns the signature field in the packfile.
func (s *Scanner) readSignature() ([]byte, error) {
var sig = make([]byte, 4)
if _, err := io.ReadFull(s.r, sig); err != nil {
return []byte{}, err
}
return sig, nil
}
// isValidSignature returns if sig is a valid packfile signature.
func (s *Scanner) isValidSignature(sig []byte) bool {
return bytes.Equal(sig, signature)
}
// readVersion reads and returns the version field of a packfile.
func (s *Scanner) readVersion() (uint32, error) {
return binary.ReadUint32(s.r)
}
// isSupportedVersion returns whether version v is supported by the parser.
// The current supported version is VersionSupported, defined above.
func (s *Scanner) isSupportedVersion(v uint32) bool {
return v == VersionSupported
}
// readCount reads and returns the count of objects field of a packfile.
func (s *Scanner) readCount() (uint32, error) {
return binary.ReadUint32(s.r)
}
// SeekObjectHeader seeks to specified offset and returns the ObjectHeader
// for the next object in the reader
func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
// if seeking we assume that you are not interested in the header
if s.version == 0 {
s.version = VersionSupported
}
if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
return nil, err
}
h, err := s.nextObjectHeader()
if err != nil {
return nil, err
}
h.Offset = offset
return h, nil
}
// NextObjectHeader returns the ObjectHeader for the next object in the reader
func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
if err := s.doPending(); err != nil {
return nil, err
}
offset, err := s.r.Seek(0, io.SeekCurrent)
if err != nil {
return nil, err
}
h, err := s.nextObjectHeader()
if err != nil {
return nil, err
}
h.Offset = offset
return h, nil
}
// nextObjectHeader returns the ObjectHeader for the next object in the reader
// without the Offset field
func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
defer s.Flush()
s.crc.Reset()
h := &ObjectHeader{}
s.pendingObject = h
var err error
h.Offset, err = s.r.Seek(0, io.SeekCurrent)
if err != nil {
return nil, err
}
h.Type, h.Length, err = s.readObjectTypeAndLength()
if err != nil {
return nil, err
}
switch h.Type {
case plumbing.OFSDeltaObject:
no, err := binary.ReadVariableWidthInt(s.r)
if err != nil {
return nil, err
}
h.OffsetReference = h.Offset - no
case plumbing.REFDeltaObject:
var err error
h.Reference, err = binary.ReadHash(s.r)
if err != nil {
return nil, err
}
}
return h, nil
}
func (s *Scanner) doPending() error {
if s.version == 0 {
var err error
s.version, s.objects, err = s.Header()
if err != nil {
return err
}
}
return s.discardObjectIfNeeded()
}
func (s *Scanner) discardObjectIfNeeded() error {
if s.pendingObject == nil {
return nil
}
h := s.pendingObject
n, _, err := s.NextObject(stdioutil.Discard)
if err != nil {
return err
}
if n != h.Length {
return fmt.Errorf(
"error discarding object, discarded %d, expected %d",
n, h.Length,
)
}
return nil
}
// ReadObjectTypeAndLength reads and returns the object type and the
// length field from an object entry in a packfile.
func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) {
t, c, err := s.readType()
if err != nil {
return t, 0, err
}
l, err := s.readLength(c)
return t, l, err
}
func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
var c byte
var err error
if c, err = s.r.ReadByte(); err != nil {
return plumbing.ObjectType(0), 0, err
}
typ := parseType(c)
return typ, c, nil
}
func parseType(b byte) plumbing.ObjectType {
return plumbing.ObjectType((b & maskType) >> firstLengthBits)
}
// the length is codified in the last 4 bits of the first byte and in
// the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
func (s *Scanner) readLength(first byte) (int64, error) {
length := int64(first & maskFirstLength)
c := first
shift := firstLengthBits
var err error
for c&maskContinue > 0 {
if c, err = s.r.ReadByte(); err != nil {
return 0, err
}
length += int64(c&maskLength) << shift
shift += lengthBits
}
return length, nil
}
// NextObject writes the content of the next object into the reader, returns
// the number of bytes written, the CRC32 of the content and an error, if any
func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
defer s.crc.Reset()
s.pendingObject = nil
written, err = s.copyObject(w)
s.Flush()
crc32 = s.crc.Sum32()
return
}
// ReadRegularObject reads and write a non-deltified object
// from it zlib stream in an object entry in the packfile.
func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
if s.zr == nil {
var zr io.ReadCloser
zr, err = zlib.NewReader(s.r)
if err != nil {
return 0, fmt.Errorf("zlib initialization error: %s", err)
}
s.zr = zr.(readerResetter)
} else {
if err = s.zr.Reset(s.r, nil); err != nil {
return 0, fmt.Errorf("zlib reset error: %s", err)
}
}
defer ioutil.CheckClose(s.zr, &err)
buf := byteSlicePool.Get().([]byte)
n, err = io.CopyBuffer(w, s.zr, buf)
byteSlicePool.Put(buf)
return
}
var byteSlicePool = sync.Pool{
New: func() interface{} {
return make([]byte, 32*1024)
},
}
// SeekFromStart sets a new offset from start, returns the old position before
// the change.
func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
// if seeking we assume that you are not interested in the header
if s.version == 0 {
s.version = VersionSupported
}
previous, err = s.r.Seek(0, io.SeekCurrent)
if err != nil {
return -1, err
}
_, err = s.r.Seek(offset, io.SeekStart)
return previous, err
}
// Checksum returns the checksum of the packfile
func (s *Scanner) Checksum() (plumbing.Hash, error) {
err := s.discardObjectIfNeeded()
if err != nil {
return plumbing.ZeroHash, err
}
return binary.ReadHash(s.r)
}
// Close reads the reader until io.EOF
func (s *Scanner) Close() error {
buf := byteSlicePool.Get().([]byte)
_, err := io.CopyBuffer(stdioutil.Discard, s.r, buf)
byteSlicePool.Put(buf)
return err
}
// Flush finishes writing the buffer to crc hasher in case we are using
// a teeReader. Otherwise it is a no-op.
func (s *Scanner) Flush() error {
tee, ok := s.r.(*teeReader)
if ok {
return tee.Flush()
}
return nil
}
type trackableReader struct {
count int64
io.Reader
}
// Read reads up to len(p) bytes into p.
func (r *trackableReader) Read(p []byte) (n int, err error) {
n, err = r.Reader.Read(p)
r.count += int64(n)
return
}
// Seek only supports io.SeekCurrent, any other operation fails
func (r *trackableReader) Seek(offset int64, whence int) (int64, error) {
if whence != io.SeekCurrent {
return -1, ErrSeekNotSupported
}
return r.count, nil
}
func newByteReadSeeker(r io.ReadSeeker) *bufferedSeeker {
return &bufferedSeeker{
r: r,
Reader: *bufio.NewReader(r),
}
}
type bufferedSeeker struct {
r io.ReadSeeker
bufio.Reader
}
func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
if whence == io.SeekCurrent && offset == 0 {
current, err := r.r.Seek(offset, whence)
if err != nil {
return current, err
}
return current - int64(r.Buffered()), nil
}
defer r.Reader.Reset(r.r)
return r.r.Seek(offset, whence)
}
type readerResetter interface {
io.ReadCloser
zlib.Resetter
}
type reader interface {
io.Reader
io.ByteReader
io.Seeker
}
type teeReader struct {
reader
w hash.Hash32
bufWriter *bufio.Writer
}
func newTeeReader(r reader, h hash.Hash32) *teeReader {
return &teeReader{
reader: r,
w: h,
bufWriter: bufio.NewWriter(h),
}
}
func (r *teeReader) Read(p []byte) (n int, err error) {
r.Flush()
n, err = r.reader.Read(p)
if n > 0 {
if n, err := r.w.Write(p[:n]); err != nil {
return n, err
}
}
return
}
func (r *teeReader) ReadByte() (b byte, err error) {
b, err = r.reader.ReadByte()
if err == nil {
return b, r.bufWriter.WriteByte(b)
}
return
}
func (r *teeReader) Flush() (err error) {
return r.bufWriter.Flush()
}

View File

@ -0,0 +1,122 @@
// Package pktline implements reading payloads form pkt-lines and encoding
// pkt-lines from payloads.
package pktline
import (
"bytes"
"errors"
"fmt"
"io"
)
// An Encoder writes pkt-lines to an output stream.
type Encoder struct {
w io.Writer
}
const (
// MaxPayloadSize is the maximum payload size of a pkt-line in bytes.
MaxPayloadSize = 65516
// For compatibility with canonical Git implementation, accept longer pkt-lines
OversizePayloadMax = 65520
)
var (
// FlushPkt are the contents of a flush-pkt pkt-line.
FlushPkt = []byte{'0', '0', '0', '0'}
// Flush is the payload to use with the Encode method to encode a flush-pkt.
Flush = []byte{}
// FlushString is the payload to use with the EncodeString method to encode a flush-pkt.
FlushString = ""
// ErrPayloadTooLong is returned by the Encode methods when any of the
// provided payloads is bigger than MaxPayloadSize.
ErrPayloadTooLong = errors.New("payload is too long")
)
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{
w: w,
}
}
// Flush encodes a flush-pkt to the output stream.
func (e *Encoder) Flush() error {
_, err := e.w.Write(FlushPkt)
return err
}
// Encode encodes a pkt-line with the payload specified and write it to
// the output stream. If several payloads are specified, each of them
// will get streamed in their own pkt-lines.
func (e *Encoder) Encode(payloads ...[]byte) error {
for _, p := range payloads {
if err := e.encodeLine(p); err != nil {
return err
}
}
return nil
}
func (e *Encoder) encodeLine(p []byte) error {
if len(p) > MaxPayloadSize {
return ErrPayloadTooLong
}
if bytes.Equal(p, Flush) {
return e.Flush()
}
n := len(p) + 4
if _, err := e.w.Write(asciiHex16(n)); err != nil {
return err
}
_, err := e.w.Write(p)
return err
}
// Returns the hexadecimal ascii representation of the 16 less
// significant bits of n. The length of the returned slice will always
// be 4. Example: if n is 1234 (0x4d2), the return value will be
// []byte{'0', '4', 'd', '2'}.
func asciiHex16(n int) []byte {
var ret [4]byte
ret[0] = byteToASCIIHex(byte(n & 0xf000 >> 12))
ret[1] = byteToASCIIHex(byte(n & 0x0f00 >> 8))
ret[2] = byteToASCIIHex(byte(n & 0x00f0 >> 4))
ret[3] = byteToASCIIHex(byte(n & 0x000f))
return ret[:]
}
// turns a byte into its hexadecimal ascii representation. Example:
// from 11 (0xb) to 'b'.
func byteToASCIIHex(n byte) byte {
if n < 10 {
return '0' + n
}
return 'a' - 10 + n
}
// EncodeString works similarly as Encode but payloads are specified as strings.
func (e *Encoder) EncodeString(payloads ...string) error {
for _, p := range payloads {
if err := e.Encode([]byte(p)); err != nil {
return err
}
}
return nil
}
// Encodef encodes a single pkt-line with the payload formatted as
// the format specifier. The rest of the arguments will be used in
// the format string.
func (e *Encoder) Encodef(format string, a ...interface{}) error {
return e.EncodeString(
fmt.Sprintf(format, a...),
)
}

View File

@ -0,0 +1,134 @@
package pktline
import (
"errors"
"io"
)
const (
lenSize = 4
)
// ErrInvalidPktLen is returned by Err() when an invalid pkt-len is found.
var ErrInvalidPktLen = errors.New("invalid pkt-len found")
// Scanner provides a convenient interface for reading the payloads of a
// series of pkt-lines. It takes an io.Reader providing the source,
// which then can be tokenized through repeated calls to the Scan
// method.
//
// After each Scan call, the Bytes method will return the payload of the
// corresponding pkt-line on a shared buffer, which will be 65516 bytes
// or smaller. Flush pkt-lines are represented by empty byte slices.
//
// Scanning stops at EOF or the first I/O error.
type Scanner struct {
r io.Reader // The reader provided by the client
err error // Sticky error
payload []byte // Last pkt-payload
len [lenSize]byte // Last pkt-len
}
// NewScanner returns a new Scanner to read from r.
func NewScanner(r io.Reader) *Scanner {
return &Scanner{
r: r,
}
}
// Err returns the first error encountered by the Scanner.
func (s *Scanner) Err() error {
return s.err
}
// Scan advances the Scanner to the next pkt-line, whose payload will
// then be available through the Bytes method. Scanning stops at EOF
// or the first I/O error. After Scan returns false, the Err method
// will return any error that occurred during scanning, except that if
// it was io.EOF, Err will return nil.
func (s *Scanner) Scan() bool {
var l int
l, s.err = s.readPayloadLen()
if s.err == io.EOF {
s.err = nil
return false
}
if s.err != nil {
return false
}
if cap(s.payload) < l {
s.payload = make([]byte, 0, l)
}
if _, s.err = io.ReadFull(s.r, s.payload[:l]); s.err != nil {
return false
}
s.payload = s.payload[:l]
return true
}
// Bytes returns the most recent payload generated by a call to Scan.
// The underlying array may point to data that will be overwritten by a
// subsequent call to Scan. It does no allocation.
func (s *Scanner) Bytes() []byte {
return s.payload
}
// Method readPayloadLen returns the payload length by reading the
// pkt-len and subtracting the pkt-len size.
func (s *Scanner) readPayloadLen() (int, error) {
if _, err := io.ReadFull(s.r, s.len[:]); err != nil {
if err == io.ErrUnexpectedEOF {
return 0, ErrInvalidPktLen
}
return 0, err
}
n, err := hexDecode(s.len)
if err != nil {
return 0, err
}
switch {
case n == 0:
return 0, nil
case n <= lenSize:
return 0, ErrInvalidPktLen
case n > OversizePayloadMax+lenSize:
return 0, ErrInvalidPktLen
default:
return n - lenSize, nil
}
}
// Turns the hexadecimal representation of a number in a byte slice into
// a number. This function substitute strconv.ParseUint(string(buf), 16,
// 16) and/or hex.Decode, to avoid generating new strings, thus helping the
// GC.
func hexDecode(buf [lenSize]byte) (int, error) {
var ret int
for i := 0; i < lenSize; i++ {
n, err := asciiHexToByte(buf[i])
if err != nil {
return 0, ErrInvalidPktLen
}
ret = 16*ret + int(n)
}
return ret, nil
}
// turns the hexadecimal ascii representation of a byte into its
// numerical value. Example: from 'b' to 11 (0xb).
func asciiHexToByte(b byte) (byte, error) {
switch {
case b >= '0' && b <= '9':
return b - '0', nil
case b >= 'a' && b <= 'f':
return b - 'a' + 10, nil
default:
return 0, ErrInvalidPktLen
}
}