// Copyright 2014 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package profile provides a representation of profile.proto and // methods to encode/decode profiles in this format. package profile import ( "bytes" "compress/gzip" "fmt" "io" "math" "path/filepath" "regexp" "sort" "strings" "sync" "time" ) // Profile is an in-memory representation of profile.proto. type Profile struct { SampleType []*ValueType DefaultSampleType string Sample []*Sample Mapping []*Mapping Location []*Location Function []*Function Comments []string DocURL string DropFrames string KeepFrames string TimeNanos int64 DurationNanos int64 PeriodType *ValueType Period int64 // The following fields are modified during encoding and copying, // so are protected by a Mutex. encodeMu sync.Mutex commentX []int64 docURLX int64 dropFramesX int64 keepFramesX int64 stringTable []string defaultSampleTypeX int64 } // ValueType corresponds to Profile.ValueType type ValueType struct { Type string // cpu, wall, inuse_space, etc Unit string // seconds, nanoseconds, bytes, etc typeX int64 unitX int64 } // Sample corresponds to Profile.Sample type Sample struct { Location []*Location Value []int64 // Label is a per-label-key map to values for string labels. // // In general, having multiple values for the given label key is strongly // discouraged - see docs for the sample label field in profile.proto. The // main reason this unlikely state is tracked here is to make the // decoding->encoding roundtrip not lossy. But we expect that the value // slices present in this map are always of length 1. Label map[string][]string // NumLabel is a per-label-key map to values for numeric labels. See a note // above on handling multiple values for a label. NumLabel map[string][]int64 // NumUnit is a per-label-key map to the unit names of corresponding numeric // label values. The unit info may be missing even if the label is in // NumLabel, see the docs in profile.proto for details. When the value is // slice is present and not nil, its length must be equal to the length of // the corresponding value slice in NumLabel. NumUnit map[string][]string locationIDX []uint64 labelX []label } // label corresponds to Profile.Label type label struct { keyX int64 // Exactly one of the two following values must be set strX int64 numX int64 // Integer value for this label // can be set if numX has value unitX int64 } // Mapping corresponds to Profile.Mapping type Mapping struct { ID uint64 Start uint64 Limit uint64 Offset uint64 File string BuildID string HasFunctions bool HasFilenames bool HasLineNumbers bool HasInlineFrames bool fileX int64 buildIDX int64 // Name of the kernel relocation symbol ("_text" or "_stext"), extracted from File. // For linux kernel mappings generated by some tools, correct symbolization depends // on knowing which of the two possible relocation symbols was used for `Start`. // This is given to us as a suffix in `File` (e.g. "[kernel.kallsyms]_stext"). // // Note, this public field is not persisted in the proto. For the purposes of // copying / merging / hashing profiles, it is considered subsumed by `File`. KernelRelocationSymbol string } // Location corresponds to Profile.Location type Location struct { ID uint64 Mapping *Mapping Address uint64 Line []Line IsFolded bool mappingIDX uint64 } // Line corresponds to Profile.Line type Line struct { Function *Function Line int64 Column int64 functionIDX uint64 } // Function corresponds to Profile.Function type Function struct { ID uint64 Name string SystemName string Filename string StartLine int64 nameX int64 systemNameX int64 filenameX int64 } // Parse parses a profile and checks for its validity. The input // may be a gzip-compressed encoded protobuf or one of many legacy // profile formats which may be unsupported in the future. func Parse(r io.Reader) (*Profile, error) { data, err := io.ReadAll(r) if err != nil { return nil, err } return ParseData(data) } // ParseData parses a profile from a buffer and checks for its // validity. func ParseData(data []byte) (*Profile, error) { var p *Profile var err error if len(data) >= 2 && data[0] == 0x1f && data[1] == 0x8b { gz, err := gzip.NewReader(bytes.NewBuffer(data)) if err == nil { data, err = io.ReadAll(gz) } if err != nil { return nil, fmt.Errorf("decompressing profile: %v", err) } } if p, err = ParseUncompressed(data); err != nil && err != errNoData && err != errConcatProfile { p, err = parseLegacy(data) } if err != nil { return nil, fmt.Errorf("parsing profile: %v", err) } if err := p.CheckValid(); err != nil { return nil, fmt.Errorf("malformed profile: %v", err) } return p, nil } var errUnrecognized = fmt.Errorf("unrecognized profile format") var errMalformed = fmt.Errorf("malformed profile format") var errNoData = fmt.Errorf("empty input file") var errConcatProfile = fmt.Errorf("concatenated profiles detected") func parseLegacy(data []byte) (*Profile, error) { parsers := []func([]byte) (*Profile, error){ parseCPU, parseHeap, parseGoCount, // goroutine, threadcreate parseThread, parseContention, parseJavaProfile, } for _, parser := range parsers { p, err := parser(data) if err == nil { p.addLegacyFrameInfo() return p, nil } if err != errUnrecognized { return nil, err } } return nil, errUnrecognized } // ParseUncompressed parses an uncompressed protobuf into a profile. func ParseUncompressed(data []byte) (*Profile, error) { if len(data) == 0 { return nil, errNoData } p := &Profile{} if err := unmarshal(data, p); err != nil { return nil, err } if err := p.postDecode(); err != nil { return nil, err } return p, nil } var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`) // massageMappings applies heuristic-based changes to the profile // mappings to account for quirks of some environments. func (p *Profile) massageMappings() { // Merge adjacent regions with matching names, checking that the offsets match if len(p.Mapping) > 1 { mappings := []*Mapping{p.Mapping[0]} for _, m := range p.Mapping[1:] { lm := mappings[len(mappings)-1] if adjacent(lm, m) { lm.Limit = m.Limit if m.File != "" { lm.File = m.File } if m.BuildID != "" { lm.BuildID = m.BuildID } p.updateLocationMapping(m, lm) continue } mappings = append(mappings, m) } p.Mapping = mappings } // Use heuristics to identify main binary and move it to the top of the list of mappings for i, m := range p.Mapping { file := strings.TrimSpace(strings.Replace(m.File, "(deleted)", "", -1)) if len(file) == 0 { continue } if len(libRx.FindStringSubmatch(file)) > 0 { continue } if file[0] == '[' { continue } // Swap what we guess is main to position 0. p.Mapping[0], p.Mapping[i] = p.Mapping[i], p.Mapping[0] break } // Keep the mapping IDs neatly sorted for i, m := range p.Mapping { m.ID = uint64(i + 1) } } // adjacent returns whether two mapping entries represent the same // mapping that has been split into two. Check that their addresses are adjacent, // and if the offsets match, if they are available. func adjacent(m1, m2 *Mapping) bool { if m1.File != "" && m2.File != "" { if m1.File != m2.File { return false } } if m1.BuildID != "" && m2.BuildID != "" { if m1.BuildID != m2.BuildID { return false } } if m1.Limit != m2.Start { return false } if m1.Offset != 0 && m2.Offset != 0 { offset := m1.Offset + (m1.Limit - m1.Start) if offset != m2.Offset { return false } } return true } func (p *Profile) updateLocationMapping(from, to *Mapping) { for _, l := range p.Location { if l.Mapping == from { l.Mapping = to } } } func serialize(p *Profile) []byte { p.encodeMu.Lock() p.preEncode() b := marshal(p) p.encodeMu.Unlock() return b } // Write writes the profile as a gzip-compressed marshaled protobuf. func (p *Profile) Write(w io.Writer) error { zw := gzip.NewWriter(w) defer zw.Close() _, err := zw.Write(serialize(p)) return err } // WriteUncompressed writes the profile as a marshaled protobuf. func (p *Profile) WriteUncompressed(w io.Writer) error { _, err := w.Write(serialize(p)) return err } // CheckValid tests whether the profile is valid. Checks include, but are // not limited to: // - len(Profile.Sample[n].value) == len(Profile.value_unit) // - Sample.id has a corresponding Profile.Location func (p *Profile) CheckValid() error { // Check that sample values are consistent sampleLen := len(p.SampleType) if sampleLen == 0 && len(p.Sample) != 0 { return fmt.Errorf("missing sample type information") } for _, s := range p.Sample { if s == nil { return fmt.Errorf("profile has nil sample") } if len(s.Value) != sampleLen { return fmt.Errorf("mismatch: sample has %d values vs. %d types", len(s.Value), len(p.SampleType)) } for _, l := range s.Location { if l == nil { return fmt.Errorf("sample has nil location") } } } // Check that all mappings/locations/functions are in the tables // Check that there are no duplicate ids mappings := make(map[uint64]*Mapping, len(p.Mapping)) for _, m := range p.Mapping { if m == nil { return fmt.Errorf("profile has nil mapping") } if m.ID == 0 { return fmt.Errorf("found mapping with reserved ID=0") } if mappings[m.ID] != nil { return fmt.Errorf("multiple mappings with same id: %d", m.ID) } mappings[m.ID] = m } functions := make(map[uint64]*Function, len(p.Function)) for _, f := range p.Function { if f == nil { return fmt.Errorf("profile has nil function") } if f.ID == 0 { return fmt.Errorf("found function with reserved ID=0") } if functions[f.ID] != nil { return fmt.Errorf("multiple functions with same id: %d", f.ID) } functions[f.ID] = f } locations := make(map[uint64]*Location, len(p.Location)) for _, l := range p.Location { if l == nil { return fmt.Errorf("profile has nil location") } if l.ID == 0 { return fmt.Errorf("found location with reserved id=0") } if locations[l.ID] != nil { return fmt.Errorf("multiple locations with same id: %d", l.ID) } locations[l.ID] = l if m := l.Mapping; m != nil { if m.ID == 0 || mappings[m.ID] != m { return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID) } } for _, ln := range l.Line { f := ln.Function if f == nil { return fmt.Errorf("location id: %d has a line with nil function", l.ID) } if f.ID == 0 || functions[f.ID] != f { return fmt.Errorf("inconsistent function %p: %d", f, f.ID) } } } return nil } // Aggregate merges the locations in the profile into equivalence // classes preserving the request attributes. It also updates the // samples to point to the merged locations. func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, columnnumber, address bool) error { for _, m := range p.Mapping { m.HasInlineFrames = m.HasInlineFrames && inlineFrame m.HasFunctions = m.HasFunctions && function m.HasFilenames = m.HasFilenames && filename m.HasLineNumbers = m.HasLineNumbers && linenumber } // Aggregate functions if !function || !filename { for _, f := range p.Function { if !function { f.Name = "" f.SystemName = "" } if !filename { f.Filename = "" } } } // Aggregate locations if !inlineFrame || !address || !linenumber || !columnnumber { for _, l := range p.Location { if !inlineFrame && len(l.Line) > 1 { l.Line = l.Line[len(l.Line)-1:] } if !linenumber { for i := range l.Line { l.Line[i].Line = 0 l.Line[i].Column = 0 } } if !columnnumber { for i := range l.Line { l.Line[i].Column = 0 } } if !address { l.Address = 0 } } } return p.CheckValid() } // NumLabelUnits returns a map of numeric label keys to the units // associated with those keys and a map of those keys to any units // that were encountered but not used. // Unit for a given key is the first encountered unit for that key. If multiple // units are encountered for values paired with a particular key, then the first // unit encountered is used and all other units are returned in sorted order // in map of ignored units. // If no units are encountered for a particular key, the unit is then inferred // based on the key. func (p *Profile) NumLabelUnits() (map[string]string, map[string][]string) { numLabelUnits := map[string]string{} ignoredUnits := map[string]map[string]bool{} encounteredKeys := map[string]bool{} // Determine units based on numeric tags for each sample. for _, s := range p.Sample { for k := range s.NumLabel { encounteredKeys[k] = true for _, unit := range s.NumUnit[k] { if unit == "" { continue } if wantUnit, ok := numLabelUnits[k]; !ok { numLabelUnits[k] = unit } else if wantUnit != unit { if v, ok := ignoredUnits[k]; ok { v[unit] = true } else { ignoredUnits[k] = map[string]bool{unit: true} } } } } } // Infer units for keys without any units associated with // numeric tag values. for key := range encounteredKeys { unit := numLabelUnits[key] if unit == "" { switch key { case "alignment", "request": numLabelUnits[key] = "bytes" default: numLabelUnits[key] = key } } } // Copy ignored units into more readable format unitsIgnored := make(map[string][]string, len(ignoredUnits)) for key, values := range ignoredUnits { units := make([]string, len(values)) i := 0 for unit := range values { units[i] = unit i++ } sort.Strings(units) unitsIgnored[key] = units } return numLabelUnits, unitsIgnored } // String dumps a text representation of a profile. Intended mainly // for debugging purposes. func (p *Profile) String() string { ss := make([]string, 0, len(p.Comments)+len(p.Sample)+len(p.Mapping)+len(p.Location)) for _, c := range p.Comments { ss = append(ss, "Comment: "+c) } if url := p.DocURL; url != "" { ss = append(ss, fmt.Sprintf("Doc: %s", url)) } if pt := p.PeriodType; pt != nil { ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit)) } ss = append(ss, fmt.Sprintf("Period: %d", p.Period)) if p.TimeNanos != 0 { ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos))) } if p.DurationNanos != 0 { ss = append(ss, fmt.Sprintf("Duration: %.4v", time.Duration(p.DurationNanos))) } ss = append(ss, "Samples:") var sh1 string for _, s := range p.SampleType { dflt := "" if s.Type == p.DefaultSampleType { dflt = "[dflt]" } sh1 = sh1 + fmt.Sprintf("%s/%s%s ", s.Type, s.Unit, dflt) } ss = append(ss, strings.TrimSpace(sh1)) for _, s := range p.Sample { ss = append(ss, s.string()) } ss = append(ss, "Locations") for _, l := range p.Location { ss = append(ss, l.string()) } ss = append(ss, "Mappings") for _, m := range p.Mapping { ss = append(ss, m.string()) } return strings.Join(ss, "\n") + "\n" } // string dumps a text representation of a mapping. Intended mainly // for debugging purposes. func (m *Mapping) string() string { bits := "" if m.HasFunctions { bits = bits + "[FN]" } if m.HasFilenames { bits = bits + "[FL]" } if m.HasLineNumbers { bits = bits + "[LN]" } if m.HasInlineFrames { bits = bits + "[IN]" } return fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s", m.ID, m.Start, m.Limit, m.Offset, m.File, m.BuildID, bits) } // string dumps a text representation of a location. Intended mainly // for debugging purposes. func (l *Location) string() string { ss := []string{} locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address) if m := l.Mapping; m != nil { locStr = locStr + fmt.Sprintf("M=%d ", m.ID) } if l.IsFolded { locStr = locStr + "[F] " } if len(l.Line) == 0 { ss = append(ss, locStr) } for li := range l.Line { lnStr := "??" if fn := l.Line[li].Function; fn != nil { lnStr = fmt.Sprintf("%s %s:%d:%d s=%d", fn.Name, fn.Filename, l.Line[li].Line, l.Line[li].Column, fn.StartLine) if fn.Name != fn.SystemName { lnStr = lnStr + "(" + fn.SystemName + ")" } } ss = append(ss, locStr+lnStr) // Do not print location details past the first line locStr = " " } return strings.Join(ss, "\n") } // string dumps a text representation of a sample. Intended mainly // for debugging purposes. func (s *Sample) string() string { ss := []string{} var sv string for _, v := range s.Value { sv = fmt.Sprintf("%s %10d", sv, v) } sv = sv + ": " for _, l := range s.Location { sv = sv + fmt.Sprintf("%d ", l.ID) } ss = append(ss, sv) const labelHeader = " " if len(s.Label) > 0 { ss = append(ss, labelHeader+labelsToString(s.Label)) } if len(s.NumLabel) > 0 { ss = append(ss, labelHeader+numLabelsToString(s.NumLabel, s.NumUnit)) } return strings.Join(ss, "\n") } // labelsToString returns a string representation of a // map representing labels. func labelsToString(labels map[string][]string) string { ls := []string{} for k, v := range labels { ls = append(ls, fmt.Sprintf("%s:%v", k, v)) } sort.Strings(ls) return strings.Join(ls, " ") } // numLabelsToString returns a string representation of a map // representing numeric labels. func numLabelsToString(numLabels map[string][]int64, numUnits map[string][]string) string { ls := []string{} for k, v := range numLabels { units := numUnits[k] var labelString string if len(units) == len(v) { values := make([]string, len(v)) for i, vv := range v { values[i] = fmt.Sprintf("%d %s", vv, units[i]) } labelString = fmt.Sprintf("%s:%v", k, values) } else { labelString = fmt.Sprintf("%s:%v", k, v) } ls = append(ls, labelString) } sort.Strings(ls) return strings.Join(ls, " ") } // SetLabel sets the specified key to the specified value for all samples in the // profile. func (p *Profile) SetLabel(key string, value []string) { for _, sample := range p.Sample { if sample.Label == nil { sample.Label = map[string][]string{key: value} } else { sample.Label[key] = value } } } // RemoveLabel removes all labels associated with the specified key for all // samples in the profile. func (p *Profile) RemoveLabel(key string) { for _, sample := range p.Sample { delete(sample.Label, key) } } // HasLabel returns true if a sample has a label with indicated key and value. func (s *Sample) HasLabel(key, value string) bool { for _, v := range s.Label[key] { if v == value { return true } } return false } // SetNumLabel sets the specified key to the specified value for all samples in the // profile. "unit" is a slice that describes the units that each corresponding member // of "values" is measured in (e.g. bytes or seconds). If there is no relevant // unit for a given value, that member of "unit" should be the empty string. // "unit" must either have the same length as "value", or be nil. func (p *Profile) SetNumLabel(key string, value []int64, unit []string) { for _, sample := range p.Sample { if sample.NumLabel == nil { sample.NumLabel = map[string][]int64{key: value} } else { sample.NumLabel[key] = value } if sample.NumUnit == nil { sample.NumUnit = map[string][]string{key: unit} } else { sample.NumUnit[key] = unit } } } // RemoveNumLabel removes all numerical labels associated with the specified key for all // samples in the profile. func (p *Profile) RemoveNumLabel(key string) { for _, sample := range p.Sample { delete(sample.NumLabel, key) delete(sample.NumUnit, key) } } // DiffBaseSample returns true if a sample belongs to the diff base and false // otherwise. func (s *Sample) DiffBaseSample() bool { return s.HasLabel("pprof::base", "true") } // Scale multiplies all sample values in a profile by a constant and keeps // only samples that have at least one non-zero value. func (p *Profile) Scale(ratio float64) { if ratio == 1 { return } ratios := make([]float64, len(p.SampleType)) for i := range p.SampleType { ratios[i] = ratio } p.ScaleN(ratios) } // ScaleN multiplies each sample values in a sample by a different amount // and keeps only samples that have at least one non-zero value. func (p *Profile) ScaleN(ratios []float64) error { if len(p.SampleType) != len(ratios) { return fmt.Errorf("mismatched scale ratios, got %d, want %d", len(ratios), len(p.SampleType)) } allOnes := true for _, r := range ratios { if r != 1 { allOnes = false break } } if allOnes { return nil } fillIdx := 0 for _, s := range p.Sample { keepSample := false for i, v := range s.Value { if ratios[i] != 1 { val := int64(math.Round(float64(v) * ratios[i])) s.Value[i] = val keepSample = keepSample || val != 0 } } if keepSample { p.Sample[fillIdx] = s fillIdx++ } } p.Sample = p.Sample[:fillIdx] return nil } // HasFunctions determines if all locations in this profile have // symbolized function information. func (p *Profile) HasFunctions() bool { for _, l := range p.Location { if l.Mapping != nil && !l.Mapping.HasFunctions { return false } } return true } // HasFileLines determines if all locations in this profile have // symbolized file and line number information. func (p *Profile) HasFileLines() bool { for _, l := range p.Location { if l.Mapping != nil && (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) { return false } } return true } // Unsymbolizable returns true if a mapping points to a binary for which // locations can't be symbolized in principle, at least now. Examples are // "[vdso]", "[vsyscall]" and some others, see the code. func (m *Mapping) Unsymbolizable() bool { name := filepath.Base(m.File) return strings.HasPrefix(name, "[") || strings.HasPrefix(name, "linux-vdso") || strings.HasPrefix(m.File, "/dev/dri/") || m.File == "//anon" } // Copy makes a fully independent copy of a profile. func (p *Profile) Copy() *Profile { pp := &Profile{} if err := unmarshal(serialize(p), pp); err != nil { panic(err) } if err := pp.postDecode(); err != nil { panic(err) } return pp }