mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 18:53:35 +00:00
rebase: update K8s packages to v0.32.1
Update K8s packages in go.mod to v0.32.1 Signed-off-by: Praveen M <m.praveen@ibm.com>
This commit is contained in:
80
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
Normal file
80
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrDevicesUnsupported is an error returned when a cgroup manager
|
||||
// is not configured to set device rules.
|
||||
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
|
||||
|
||||
// ErrRootless is returned by [Manager.Apply] when there is an error
|
||||
// creating cgroup directory, and cgroup.Rootless is set. In general,
|
||||
// this error is to be ignored.
|
||||
ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
|
||||
|
||||
// DevicesSetV1 and DevicesSetV2 are functions to set devices for
|
||||
// cgroup v1 and v2, respectively. Unless
|
||||
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
|
||||
// package is imported, it is set to nil, so cgroup managers can't
|
||||
// manage devices.
|
||||
DevicesSetV1 func(path string, r *configs.Resources) error
|
||||
DevicesSetV2 func(path string, r *configs.Resources) error
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Apply creates a cgroup, if not yet created, and adds a process
|
||||
// with the specified pid into that cgroup. A special value of -1
|
||||
// can be used to merely create a cgroup.
|
||||
Apply(pid int) error
|
||||
|
||||
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||
// any all its sub-cgroups.
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// GetStats returns cgroups statistics.
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Freeze sets the freezer cgroup to the specified state.
|
||||
Freeze(state configs.FreezerState) error
|
||||
|
||||
// Destroy removes cgroup.
|
||||
Destroy() error
|
||||
|
||||
// Path returns a cgroup path to the specified controller/subsystem.
|
||||
// For cgroupv2, the argument is unused and can be empty.
|
||||
Path(string) string
|
||||
|
||||
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||
// the resources specified during Manager creation (or the previous call
|
||||
// to Set) are used.
|
||||
Set(r *configs.Resources) error
|
||||
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||
// restore later.
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||
// path to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||
// unified path.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
|
||||
// Exists returns whether the cgroup path exists or not.
|
||||
Exists() bool
|
||||
|
||||
// OOMKillCount reports OOM kill count for the cgroup.
|
||||
OOMKillCount() (uint64, error)
|
||||
}
|
216
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
Normal file
216
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
Normal file
@ -0,0 +1,216 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||
// It is supposed to be used for cgroup files only, and returns
|
||||
// an error if the file is not a cgroup file.
|
||||
//
|
||||
// Arguments dir and file are joined together to form an absolute path
|
||||
// to a file being opened.
|
||||
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
return openFile(dir, file, flags)
|
||||
}
|
||||
|
||||
// ReadFile reads data from a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fd.Close()
|
||||
var buf bytes.Buffer
|
||||
|
||||
_, err = buf.ReadFrom(fd)
|
||||
return buf.String(), err
|
||||
}
|
||||
|
||||
// WriteFile writes data to a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func WriteFile(dir, file, data string) error {
|
||||
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
if _, err := fd.WriteString(data); err != nil {
|
||||
// Having data in the error message helps in debugging.
|
||||
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteFileByLine is the same as WriteFile, except if data contains newlines,
|
||||
// it is written line by line.
|
||||
func WriteFileByLine(dir, file, data string) error {
|
||||
i := strings.Index(data, "\n")
|
||||
if i == -1 {
|
||||
return WriteFile(dir, file, data)
|
||||
}
|
||||
|
||||
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
start := 0
|
||||
for {
|
||||
var line string
|
||||
if i == -1 {
|
||||
line = data[start:]
|
||||
} else {
|
||||
line = data[start : start+i+1]
|
||||
}
|
||||
_, err := fd.WriteString(line)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %q: %w", line, err)
|
||||
}
|
||||
if i == -1 {
|
||||
break
|
||||
}
|
||||
start += i + 1
|
||||
i = strings.Index(data[start:], "\n")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
cgroupfsDir = "/sys/fs/cgroup"
|
||||
cgroupfsPrefix = cgroupfsDir + "/"
|
||||
)
|
||||
|
||||
var (
|
||||
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||
TestMode bool
|
||||
|
||||
cgroupRootHandle *os.File
|
||||
prepOnce sync.Once
|
||||
prepErr error
|
||||
resolveFlags uint64
|
||||
)
|
||||
|
||||
func prepareOpenat2() error {
|
||||
prepOnce.Do(func() {
|
||||
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
||||
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
|
||||
})
|
||||
if err != nil {
|
||||
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||
if err != unix.ENOSYS {
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
} else {
|
||||
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||
}
|
||||
return
|
||||
}
|
||||
file := os.NewFile(uintptr(fd), cgroupfsDir)
|
||||
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
|
||||
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
return
|
||||
}
|
||||
|
||||
cgroupRootHandle = file
|
||||
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
||||
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
||||
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
||||
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
||||
}
|
||||
})
|
||||
|
||||
return prepErr
|
||||
}
|
||||
|
||||
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||
mode := os.FileMode(0)
|
||||
if TestMode && flags&os.O_WRONLY != 0 {
|
||||
// "emulate" cgroup fs for unit tests
|
||||
flags |= os.O_TRUNC | os.O_CREATE
|
||||
mode = 0o600
|
||||
}
|
||||
path := path.Join(dir, utils.CleanPath(file))
|
||||
if prepareOpenat2() != nil {
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||
if len(relPath) == len(path) { // non-standard path, old system?
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
|
||||
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
|
||||
&unix.OpenHow{
|
||||
Resolve: resolveFlags,
|
||||
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||
Mode: uint64(mode),
|
||||
})
|
||||
if err != nil {
|
||||
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||
// Check if cgroupRootHandle is still opened to cgroupfsDir
|
||||
// (happens when this package is incorrectly used
|
||||
// across the chroot/pivot_root/mntns boundary, or
|
||||
// when /sys/fs/cgroup is remounted).
|
||||
//
|
||||
// TODO: if such usage will ever be common, amend this
|
||||
// to reopen cgroupRootHandle and retry openat2.
|
||||
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
|
||||
defer closer()
|
||||
fdDest, _ := os.Readlink(fdPath)
|
||||
if fdDest != cgroupfsDir {
|
||||
// Wrap the error so it is clear that cgroupRootHandle
|
||||
// is opened to an unexpected/wrong directory.
|
||||
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
|
||||
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||
|
||||
// Can be changed by unit tests.
|
||||
var openFallback = openAndCheck
|
||||
|
||||
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||
// file is on cgroupfs, returning an error otherwise.
|
||||
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
fd, err := os.OpenFile(path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if TestMode {
|
||||
return fd, nil
|
||||
}
|
||||
// Check this is a cgroupfs file.
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||
}
|
||||
|
||||
return fd, nil
|
||||
}
|
311
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
Normal file
311
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
Normal file
@ -0,0 +1,311 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type BlkioGroup struct {
|
||||
weightFilename string
|
||||
weightDeviceFilename string
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||
s.detectWeightFilenames(path)
|
||||
if r.BlkioWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.BlkioLeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if wd.Weight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if wd.LeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
examples:
|
||||
|
||||
blkio.sectors
|
||||
8:0 6792
|
||||
|
||||
blkio.io_service_bytes
|
||||
8:0 Read 1282048
|
||||
8:0 Write 2195456
|
||||
8:0 Sync 2195456
|
||||
8:0 Async 1282048
|
||||
8:0 Total 3477504
|
||||
Total 3477504
|
||||
|
||||
blkio.io_serviced
|
||||
8:0 Read 124
|
||||
8:0 Write 104
|
||||
8:0 Sync 104
|
||||
8:0 Async 124
|
||||
8:0 Total 228
|
||||
Total 228
|
||||
|
||||
blkio.io_queued
|
||||
8:0 Read 0
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 0
|
||||
Total 0
|
||||
*/
|
||||
|
||||
func splitBlkioStatLine(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
}
|
||||
|
||||
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return blkioStats, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
// format: dev type amount
|
||||
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
|
||||
if len(fields) < 3 {
|
||||
if len(fields) == 2 && fields[0] == "Total" {
|
||||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, malformedLine(dir, file, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
minor := v
|
||||
|
||||
op := ""
|
||||
valueField := 2
|
||||
if len(fields) == 4 {
|
||||
op = fields[2]
|
||||
valueField = 3
|
||||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
type blkioStatInfo struct {
|
||||
filename string
|
||||
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
|
||||
}
|
||||
bfqDebugStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
bfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
cfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
throttleRecursiveStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
baseStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
orderedStats := [][]blkioStatInfo{
|
||||
bfqDebugStats,
|
||||
bfqStats,
|
||||
cfqStats,
|
||||
throttleRecursiveStats,
|
||||
baseStats,
|
||||
}
|
||||
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
for _, statGroup := range orderedStats {
|
||||
for i, statInfo := range statGroup {
|
||||
if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
|
||||
// if error occurs on first file, move to next group
|
||||
if i == 0 {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
*statInfo.blkioStatEntriesPtr = blkioStats
|
||||
// finish if all stats are gathered
|
||||
if i == len(statGroup)-1 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) detectWeightFilenames(path string) {
|
||||
if s.weightFilename != "" {
|
||||
// Already detected.
|
||||
return
|
||||
}
|
||||
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
|
||||
s.weightFilename = "blkio.weight"
|
||||
s.weightDeviceFilename = "blkio.weight_device"
|
||||
} else {
|
||||
s.weightFilename = "blkio.bfq.weight"
|
||||
s.weightDeviceFilename = "blkio.bfq.weight_device"
|
||||
}
|
||||
}
|
182
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
Normal file
182
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type CpuGroup struct{}
|
||||
|
||||
func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||
var period string
|
||||
if r.CpuRtPeriod != 0 {
|
||||
period = strconv.FormatUint(r.CpuRtPeriod, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil {
|
||||
// The values of cpu.rt_period_us and cpu.rt_runtime_us
|
||||
// are inter-dependent and need to be set in a proper order.
|
||||
// If the kernel rejects the new period value with EINVAL
|
||||
// and the new runtime value is also being set, let's
|
||||
// ignore the error for now and retry later.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuRtRuntime == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
period = ""
|
||||
}
|
||||
}
|
||||
if r.CpuRtRuntime != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if period != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpuShares != 0 {
|
||||
shares := r.CpuShares
|
||||
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
// read it back
|
||||
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// ... and check
|
||||
if shares > sharesRead {
|
||||
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||
} else if shares < sharesRead {
|
||||
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||
}
|
||||
}
|
||||
|
||||
var period string
|
||||
if r.CpuPeriod != 0 {
|
||||
period = strconv.FormatUint(r.CpuPeriod, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
// Sometimes when the period to be set is smaller
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_period exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
period = ""
|
||||
}
|
||||
}
|
||||
|
||||
var burst string
|
||||
if r.CpuBurst != nil {
|
||||
burst = strconv.FormatUint(*r.CpuBurst, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil {
|
||||
if errors.Is(err, unix.ENOENT) {
|
||||
// If CPU burst knob is not available (e.g.
|
||||
// older kernel), ignore it.
|
||||
burst = ""
|
||||
} else {
|
||||
// Sometimes when the burst to be set is larger
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_burst exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
burst = ""
|
||||
}
|
||||
}
|
||||
if r.CpuQuota != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if period != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if burst != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r.CPUIdle != nil {
|
||||
idle := strconv.FormatInt(*r.CPUIdle, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.idle", idle); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return s.SetRtSched(path, r)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_time":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
166
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
Normal file
166
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||
|
||||
nanosecondsInSecond = 1000000000
|
||||
|
||||
userModeColumn = 1
|
||||
kernelModeColumn = 2
|
||||
cuacctUsageAllColumnsNumber = 3
|
||||
|
||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||
// on Linux it's a constant which is safe to be hard coded,
|
||||
// so we can avoid using cgo here. For details, see:
|
||||
// https://github.com/containerd/cgroups/pull/12
|
||||
clockTicks uint64 = 100
|
||||
)
|
||||
|
||||
type CpuacctGroup struct{}
|
||||
|
||||
func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsage, err := getPercpuUsage(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns user and kernel usage breakdown in nanoseconds.
|
||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
var userModeUsage, kernelModeUsage uint64
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
file = cgroupCpuacctStat
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
fields := strings.Fields(data)
|
||||
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||
return 0, 0, malformedLine(path, file, data)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
const file = "cpuacct.usage_percpu"
|
||||
percpuUsage := []uint64{}
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
// TODO: use strings.SplitN instead.
|
||||
for _, value := range strings.Fields(data) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
||||
|
||||
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
usageKernelMode := []uint64{}
|
||||
usageUserMode := []uint64{}
|
||||
const file = cgroupCpuacctUsageAll
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
} else if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
scanner.Scan() // skipping header line
|
||||
|
||||
for scanner.Scan() {
|
||||
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||
continue
|
||||
}
|
||||
|
||||
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||
|
||||
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
}
|
245
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
Normal file
245
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,245 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type CpusetGroup struct{}
|
||||
|
||||
func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
return s.ApplyDir(path, r, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||
var extracted []uint16
|
||||
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
}
|
||||
if len(fileContent) == 0 {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||
}
|
||||
|
||||
for _, s := range strings.Split(fileContent, ",") {
|
||||
sp := strings.SplitN(s, "-", 3)
|
||||
switch len(sp) {
|
||||
case 3:
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||
case 2:
|
||||
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if min > max {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||
}
|
||||
for i := min; i <= max; i++ {
|
||||
extracted = append(extracted, uint16(i))
|
||||
}
|
||||
case 1:
|
||||
value, err := strconv.ParseUint(s, 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
extracted = append(extracted, uint16(value))
|
||||
}
|
||||
}
|
||||
|
||||
return extracted, nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
var err error
|
||||
|
||||
stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
// to ensure cpuset configs are set before moving task into the
|
||||
// cgroup.
|
||||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
|
||||
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// cpusetEnsureParent makes sure that the parent directories of current
|
||||
// are created and populated with the proper cpus and mems files copied
|
||||
// from their respective parent. It does that recursively, starting from
|
||||
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
|
||||
func cpusetEnsureParent(current string) error {
|
||||
var st unix.Statfs_t
|
||||
|
||||
parent := filepath.Dir(current)
|
||||
err := unix.Statfs(parent, &st)
|
||||
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return nil
|
||||
}
|
||||
// Treat non-existing directory as cgroupfs as it will be created,
|
||||
// and the root cpuset directory obviously exists.
|
||||
if err != nil && err != unix.ENOENT {
|
||||
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||
}
|
||||
|
||||
if err := cpusetEnsureParent(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func cpusetCopyIfNeeded(current, parent string) error {
|
||||
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if isEmptyCpuset(currentCpus) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if isEmptyCpuset(currentMems) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isEmptyCpuset(str string) bool {
|
||||
return str == "" || str == "\n"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||
if err := s.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
39
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
Normal file
39
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type DevicesGroup struct{}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
if path == "" {
|
||||
// Return error here, since devices cgroup
|
||||
// is a hard requirement for container's security.
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||
if cgroups.DevicesSetV1 == nil {
|
||||
if len(r.Devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
return cgroups.ErrDevicesUnsupported
|
||||
}
|
||||
return cgroups.DevicesSetV1(path, r)
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||
// in a particular format but get some garbage instead.
|
||||
func malformedLine(path, file, line string) error {
|
||||
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||
}
|
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
Normal file
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
Normal file
@ -0,0 +1,158 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type FreezerGroup struct{}
|
||||
|
||||
func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||
switch r.Freezer {
|
||||
case configs.Frozen:
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
// Freezing failed, and it is bad and dangerous
|
||||
// to leave the cgroup in FROZEN or FREEZING
|
||||
// state, so (try to) thaw it back.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
}
|
||||
}()
|
||||
|
||||
// As per older kernel docs (freezer-subsystem.txt before
|
||||
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||
// userspace should either retry or thaw. While current
|
||||
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||
// freeze a cgroup v1 while new processes keep appearing in it
|
||||
// (either via fork/clone or by writing new PIDs to
|
||||
// cgroup.procs).
|
||||
//
|
||||
// The numbers below are empirically chosen to have a decent
|
||||
// chance to succeed in various scenarios ("runc pause/unpause
|
||||
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||
//
|
||||
// Adding any amount of sleep in between retries did not
|
||||
// increase the chances of successful freeze in "pause/unpause
|
||||
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||
// sleep helped for the case where the system is extremely slow
|
||||
// (CentOS 7 VM on GHA CI).
|
||||
//
|
||||
// Alas, this is still a game of chances, since the real fix
|
||||
// belong to the kernel (cgroup v2 do not have this bug).
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if i%50 == 49 {
|
||||
// Occasional thaw and sleep improves
|
||||
// the chances to succeed in freezing
|
||||
// in case new processes keep appearing
|
||||
// in the cgroup.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i%25 == 24 {
|
||||
// Occasional short sleep before reading
|
||||
// the state back also improves the chances to
|
||||
// succeed in freezing in case of a very slow
|
||||
// system.
|
||||
time.Sleep(10 * time.Microsecond)
|
||||
}
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
state = strings.TrimSpace(state)
|
||||
switch state {
|
||||
case "FREEZING":
|
||||
continue
|
||||
case string(configs.Frozen):
|
||||
if i > 1 {
|
||||
logrus.Debugf("frozen after %d retries", i)
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
// should never happen
|
||||
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
||||
}
|
||||
}
|
||||
// Despite our best efforts, it got stuck in FREEZING.
|
||||
return errors.New("unable to freeze")
|
||||
case configs.Thawed:
|
||||
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||
for {
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as
|
||||
// being in an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch strings.TrimSpace(state) {
|
||||
case "THAWED":
|
||||
return configs.Thawed, nil
|
||||
case "FROZEN":
|
||||
// Find out whether the cgroup is frozen directly,
|
||||
// or indirectly via an ancestor.
|
||||
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat
|
||||
// it as being frozen.
|
||||
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Frozen, err
|
||||
}
|
||||
switch self {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return configs.Frozen, nil
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
||||
}
|
||||
case "FREEZING":
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||
// is still undergoing freezing. This should be a temporary delay.
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||
}
|
||||
}
|
||||
}
|
266
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
Normal file
266
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
Normal file
@ -0,0 +1,266 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&RdmaGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
&NameGroup{GroupName: "misc", Join: true},
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
func init() {
|
||||
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||
// it should join the cgroups v2.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// GetStats fills in the stats for the subsystem.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||
// subsystems use resources to pre-configure the cgroup parents
|
||||
// before creating or joining it.
|
||||
Apply(path string, r *configs.Resources, pid int) error
|
||||
// Set sets the cgroup resources.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string) (*Manager, error) {
|
||||
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||
// cgroups.Resources to not be nil in Apply.
|
||||
if cg.Resources == nil {
|
||||
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||
}
|
||||
if cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return true
|
||||
}
|
||||
// Handle some specific syscall errors.
|
||||
var errno unix.Errno
|
||||
if errors.As(err, &errno) {
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (retErr error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
c := m.cgroups
|
||||
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
p, ok := m.paths[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems here, but do delete the path from
|
||||
// the m.paths map, since it is either non-existent and could not
|
||||
// be created, or the pid could not be added to it.
|
||||
//
|
||||
// Cases where limits for the subsystem have been set are handled
|
||||
// later by Set, which fails with a friendly error (see
|
||||
// if path == "" in Set).
|
||||
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||
retErr = cgroups.ErrRootless
|
||||
delete(m.paths, name)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return retErr
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return cgroups.RemovePaths(m.paths)
|
||||
}
|
||||
|
||||
func (m *Manager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
// When rootless is true, errors from the device subsystem
|
||||
// are ignored, as it is really not expected to work.
|
||||
if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) {
|
||||
continue
|
||||
}
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
path := m.Path("freezer")
|
||||
if path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
dir := m.Path("freezer")
|
||||
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||
if dir == "" {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &FreezerGroup{}
|
||||
return freezer.GetState(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *Manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.Path("memory"))
|
||||
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
84
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
Normal file
84
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type HugetlbGroup struct{}
|
||||
|
||||
func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
const suffix = ".limit_in_bytes"
|
||||
skipRsvd := false
|
||||
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
prefix := "hugetlb." + hugetlb.Pagesize
|
||||
val := strconv.FormatUint(hugetlb.Limit, 10)
|
||||
if err := cgroups.WriteFile(path, prefix+suffix, val); err != nil {
|
||||
return err
|
||||
}
|
||||
if skipRsvd {
|
||||
continue
|
||||
}
|
||||
if err := cgroups.WriteFile(path, prefix+".rsvd"+suffix, val); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
skipRsvd = true
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
rsvd := ".rsvd"
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
again:
|
||||
prefix := "hugetlb." + pageSize + rsvd
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, prefix+".usage_in_bytes")
|
||||
if err != nil {
|
||||
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
|
||||
rsvd = ""
|
||||
goto again
|
||||
}
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, prefix+".max_usage_in_bytes")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, prefix+".failcnt")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pageSize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
Normal file
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
Normal file
@ -0,0 +1,357 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct{}
|
||||
|
||||
func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func setMemory(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||
if !errors.Is(err, unix.EBUSY) {
|
||||
return err
|
||||
}
|
||||
|
||||
// EBUSY means the kernel can't set new limit as it's too low
|
||||
// (lower than the current usage). Return more specific error.
|
||||
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
}
|
||||
|
||||
func setSwap(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||
// If the memory update is set to -1 and the swap is not explicitly
|
||||
// set, we should also set swap to -1, it means unlimited memory.
|
||||
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
r.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||
if err := setMemoryAndSwap(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ignore KernelMemory and KernelMemoryTCP
|
||||
|
||||
if r.MemoryReservation != 0 {
|
||||
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.OomKillDisable {
|
||||
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *r.MemorySwappiness <= 100 {
|
||||
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryData(path, "memsw")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{
|
||||
Usage: swapUsage.Usage - memoryUsage.Usage,
|
||||
Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt,
|
||||
}
|
||||
kernelUsage, err := getMemoryData(path, "kmem")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelUsage = kernelUsage
|
||||
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
|
||||
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
var (
|
||||
usage = moduleName + ".usage_in_bytes"
|
||||
maxUsage = moduleName + ".max_usage_in_bytes"
|
||||
failcnt = moduleName + ".failcnt"
|
||||
limit = moduleName + ".limit_in_bytes"
|
||||
)
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as swap and kmem controllers
|
||||
// are optional in the kernel.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if name == "kmem" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as kmem.limit_in_bytes has
|
||||
// been removed in newer kernels.
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||
const (
|
||||
maxColumns = math.MaxUint8 + 1
|
||||
file = "memory.numa_stat"
|
||||
)
|
||||
stats := cgroups.PageUsageByNUMA{}
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return stats, nil
|
||||
} else if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||
// and it looks like this:
|
||||
//
|
||||
// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
var field *cgroups.PageStats
|
||||
|
||||
line := scanner.Text()
|
||||
columns := strings.SplitN(line, " ", maxColumns)
|
||||
for i, column := range columns {
|
||||
key, val, ok := strings.Cut(column, "=")
|
||||
// Some custom kernels have non-standard fields, like
|
||||
// numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||
// numa_exectime 0
|
||||
if !ok {
|
||||
if i == 0 {
|
||||
// Ignore/skip those.
|
||||
break
|
||||
} else {
|
||||
// The first column was already validated,
|
||||
// so be strict to the rest.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
}
|
||||
if i == 0 { // First column: key is name, val is total.
|
||||
field = getNUMAField(&stats, key)
|
||||
if field == nil { // unknown field (new kernel?)
|
||||
break
|
||||
}
|
||||
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
field.Nodes = map[uint8]uint64{}
|
||||
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||
if len(key) < 2 || key[0] != 'N' {
|
||||
// This is definitely an error.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
|
||||
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
usage, err := strconv.ParseUint(val, 10, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
field.Nodes[uint8(n)] = usage
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
|
||||
switch name {
|
||||
case "total":
|
||||
return &stats.Total
|
||||
case "file":
|
||||
return &stats.File
|
||||
case "anon":
|
||||
return &stats.Anon
|
||||
case "unevictable":
|
||||
return &stats.Unevictable
|
||||
case "hierarchical_total":
|
||||
return &stats.Hierarchical.Total
|
||||
case "hierarchical_file":
|
||||
return &stats.Hierarchical.File
|
||||
case "hierarchical_anon":
|
||||
return &stats.Hierarchical.Anon
|
||||
case "hierarchical_unevictable":
|
||||
return &stats.Hierarchical.Unevictable
|
||||
}
|
||||
return nil
|
||||
}
|
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
Normal file
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NameGroup struct {
|
||||
GroupName string
|
||||
Join bool
|
||||
}
|
||||
|
||||
func (s *NameGroup) Name() string {
|
||||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
if s.Join {
|
||||
// Ignore errors if the named cgroup does not exist.
|
||||
_ = apply(path, pid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
Normal file
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetClsGroup struct{}
|
||||
|
||||
func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.NetClsClassid != 0 {
|
||||
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
Normal file
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetPrioGroup struct{}
|
||||
|
||||
func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, prioMap := range r.NetPrioIfpriomap {
|
||||
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
@ -0,0 +1,186 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var (
|
||||
cgroupRootLock sync.Mutex
|
||||
cgroupRoot string
|
||||
)
|
||||
|
||||
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||
|
||||
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||
root, err := rootPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inner, err := innerPath(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
path, err := subsysPath(root, inner, name)
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem
|
||||
// is considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func tryDefaultCgroupRoot() string {
|
||||
var st, pst unix.Stat_t
|
||||
|
||||
// (1) it should be a directory...
|
||||
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) ... and a mount point ...
|
||||
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Dev == pst.Dev {
|
||||
// parent dir has the same dev -- not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) ... of 'tmpfs' fs type.
|
||||
var fst unix.Statfs_t
|
||||
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (4) it should have at least 1 entry ...
|
||||
dir, err := os.Open(defaultCgroupRoot)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer dir.Close()
|
||||
names, err := dir.Readdirnames(1)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if len(names) < 1 {
|
||||
return ""
|
||||
}
|
||||
// ... which is a cgroup mount point.
|
||||
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return defaultCgroupRoot
|
||||
}
|
||||
|
||||
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||
func rootPath() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// fast path
|
||||
cgroupRoot = tryDefaultCgroupRoot()
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// slow path: parse mountinfo
|
||||
mi, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||
// use its parent directory.
|
||||
root := filepath.Dir(mi[0].Mountpoint)
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
func innerPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(c.Path)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(c.Parent)
|
||||
cgName := utils.CleanPath(c.Name)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return innerPath, nil
|
||||
}
|
||||
|
||||
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(inner) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath for dind-like cases, when cgroupns is not
|
||||
// available. This is ugly.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, inner), nil
|
||||
}
|
||||
|
||||
func apply(path string, pid int) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
Normal file
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PerfEventGroup struct{}
|
||||
|
||||
func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
Normal file
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct{}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if r.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type RdmaGroup struct{}
|
||||
|
||||
func (s *RdmaGroup) Name() string {
|
||||
return "rdma"
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||
return fscommon.RdmaSet(path, r)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return fscommon.RdmaGetStats(path, stats)
|
||||
}
|
118
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
118
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpuSet(r *configs.Resources) bool {
|
||||
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != nil
|
||||
}
|
||||
|
||||
func setCpu(dirPath string, r *configs.Resources) error {
|
||||
if !isCpuSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.CPUIdle != nil {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.idle", strconv.FormatInt(*r.CPUIdle, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
|
||||
if r.CpuWeight != 0 {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var burst string
|
||||
if r.CpuBurst != nil {
|
||||
burst = strconv.FormatUint(*r.CpuBurst, 10)
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil {
|
||||
// Sometimes when the burst to be set is larger
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_burst exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
burst = ""
|
||||
}
|
||||
}
|
||||
if r.CpuQuota != 0 || r.CpuPeriod != 0 {
|
||||
str := "max"
|
||||
if r.CpuQuota > 0 {
|
||||
str = strconv.FormatInt(r.CpuQuota, 10)
|
||||
}
|
||||
period := r.CpuPeriod
|
||||
if period == 0 {
|
||||
// This default value is documented in
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
period = 100000
|
||||
}
|
||||
str += " " + strconv.FormatUint(period, 10)
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
|
||||
return err
|
||||
}
|
||||
if burst != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_usec":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpusetSet(r *configs.Resources) bool {
|
||||
return r.CpusetCpus != "" || r.CpusetMems != ""
|
||||
}
|
||||
|
||||
func setCpuset(dirPath string, r *configs.Resources) error {
|
||||
if !isCpusetSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
152
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
152
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func supportedControllers() (string, error) {
|
||||
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
|
||||
}
|
||||
|
||||
// needAnyControllers returns whether we enable some supported controllers or not,
|
||||
// based on (1) controllers available and (2) resources that are being set.
|
||||
// We don't check "pseudo" controllers such as
|
||||
// "freezer" and "devices".
|
||||
func needAnyControllers(r *configs.Resources) (bool, error) {
|
||||
if r == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// list of all available controllers
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
avail := make(map[string]struct{})
|
||||
for _, ctr := range strings.Fields(content) {
|
||||
avail[ctr] = struct{}{}
|
||||
}
|
||||
|
||||
// check whether the controller if available or not
|
||||
have := func(controller string) bool {
|
||||
_, ok := avail[controller]
|
||||
return ok
|
||||
}
|
||||
|
||||
if isPidsSet(r) && have("pids") {
|
||||
return true, nil
|
||||
}
|
||||
if isMemorySet(r) && have("memory") {
|
||||
return true, nil
|
||||
}
|
||||
if isIoSet(r) && have("io") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpuSet(r) && have("cpu") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpusetSet(r) && have("cpuset") {
|
||||
return true, nil
|
||||
}
|
||||
if isHugeTlbSet(r) && have("hugetlb") {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// containsDomainController returns whether the current config contains domain controller or not.
|
||||
// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
|
||||
func containsDomainController(r *configs.Resources) bool {
|
||||
return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r)
|
||||
}
|
||||
|
||||
// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
|
||||
func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
if !strings.HasPrefix(path, UnifiedMountpoint) {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
cgTypeFile = "cgroup.type"
|
||||
cgStCtlFile = "cgroup.subtree_control"
|
||||
)
|
||||
ctrs := strings.Fields(content)
|
||||
res := "+" + strings.Join(ctrs, " +")
|
||||
|
||||
elements := strings.Split(path, "/")
|
||||
elements = elements[3:]
|
||||
current := "/sys/fs"
|
||||
for i, e := range elements {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0o755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
current := current
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
cgType, _ := cgroups.ReadFile(current, cgTypeFile)
|
||||
cgType = strings.TrimSpace(cgType)
|
||||
switch cgType {
|
||||
// If the cgroup is in an invalid mode (usually this means there's an internal
|
||||
// process in the cgroup tree, because we created a cgroup under an
|
||||
// already-populated-by-other-processes cgroup), then we have to error out if
|
||||
// the user requested controllers which are not thread-aware. However, if all
|
||||
// the controllers requested are thread-aware we can simply put the cgroup into
|
||||
// threaded mode.
|
||||
case "domain invalid":
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
|
||||
} else {
|
||||
// Not entirely correct (in theory we'd always want to be a domain --
|
||||
// since that means we're a properly delegated cgroup subtree) but in
|
||||
// this case there's not much we can do and it's better than giving an
|
||||
// error.
|
||||
_ = cgroups.WriteFile(current, cgTypeFile, "threaded")
|
||||
}
|
||||
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
|
||||
// (and you cannot usually take a cgroup out of threaded mode).
|
||||
case "domain threaded":
|
||||
fallthrough
|
||||
case "threaded":
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType)
|
||||
}
|
||||
}
|
||||
}
|
||||
// enable all supported controllers
|
||||
if i < len(elements)-1 {
|
||||
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
|
||||
// try write one by one
|
||||
allCtrs := strings.Split(res, " ")
|
||||
for _, ctr := range allCtrs {
|
||||
_ = cgroups.WriteFile(current, cgStCtlFile, ctr)
|
||||
}
|
||||
}
|
||||
// Some controllers might not be enabled when rootless or containerized,
|
||||
// but we don't catch the error here. (Caught in setXXX() functions.)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
|
||||
return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(cgPath)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(cgParent)
|
||||
cgName := utils.CleanPath(cgName)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
|
||||
// we don't need to use /proc/thread-self here because runc always runs
|
||||
// with every thread in the same cgroup. This lets us avoid having to do
|
||||
// runtime.LockOSThread.
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// The current user scope most probably has tasks in it already,
|
||||
// making it impossible to enable controllers for its sub-cgroup.
|
||||
// A parent cgroup (with no tasks in it) is what we need.
|
||||
ownCgroup = filepath.Dir(ownCgroup)
|
||||
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
for s.Scan() {
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
127
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
127
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var stateStr string
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
stateStr = "1"
|
||||
case configs.Thawed:
|
||||
stateStr = "0"
|
||||
default:
|
||||
return fmt.Errorf("invalid freezer state %q requested", state)
|
||||
}
|
||||
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
|
||||
if err != nil {
|
||||
// We can ignore this request as long as the user didn't ask us to
|
||||
// freeze the container (since without the freezer cgroup, that's a
|
||||
// no-op).
|
||||
if state != configs.Frozen {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("freezer not supported: %w", err)
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
if _, err := fd.WriteString(stateStr); err != nil {
|
||||
return err
|
||||
}
|
||||
// Confirm that the cgroup did actually change states.
|
||||
if actualState, err := readFreezer(dirPath, fd); err != nil {
|
||||
return err
|
||||
} else if actualState != state {
|
||||
return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as being in
|
||||
// an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
return readFreezer(dirPath, fd)
|
||||
}
|
||||
|
||||
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
|
||||
if _, err := fd.Seek(0, 0); err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
state := make([]byte, 2)
|
||||
if _, err := fd.Read(state); err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch string(state) {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return waitFrozen(dirPath)
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
}
|
||||
}
|
||||
|
||||
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
|
||||
func waitFrozen(dirPath string) (configs.FreezerState, error) {
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// XXX: Simple wait/read/retry is used here. An implementation
|
||||
// based on poll(2) or inotify(7) is possible, but it makes the code
|
||||
// much more complicated. Maybe address this later.
|
||||
const (
|
||||
// Perform maxIter with waitTime in between iterations.
|
||||
waitTime = 10 * time.Millisecond
|
||||
maxIter = 1000
|
||||
)
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for i := 0; scanner.Scan(); {
|
||||
if i == maxIter {
|
||||
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
|
||||
}
|
||||
line := scanner.Text()
|
||||
val := strings.TrimPrefix(line, "frozen ")
|
||||
if val != line { // got prefix
|
||||
if val[0] == '1' {
|
||||
return configs.Frozen, nil
|
||||
}
|
||||
|
||||
i++
|
||||
// wait, then re-read
|
||||
time.Sleep(waitTime)
|
||||
_, err := fd.Seek(0, 0)
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Should only reach here either on read error,
|
||||
// or if the file does not contain "frozen " line.
|
||||
return configs.Undefined, scanner.Err()
|
||||
}
|
318
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
318
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
@ -0,0 +1,318 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
type Manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
dirPath string
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
}
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string) (*Manager, error) {
|
||||
if dirPath == "" {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
m := &Manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *Manager) getControllers() error {
|
||||
if m.controllers != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
if m.config.Rootless && m.config.Path == "" {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
fields := strings.Fields(data)
|
||||
m.controllers = make(map[string]struct{}, len(fields))
|
||||
for _, c := range fields {
|
||||
m.controllers[c] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
if err := CreateCgroupPath(m.dirPath, m.config); err != nil {
|
||||
// Related tests:
|
||||
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
|
||||
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
|
||||
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if m.config.Rootless {
|
||||
if m.config.Path == "" {
|
||||
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
|
||||
return cgroups.ErrRootless
|
||||
}
|
||||
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
var errs []error
|
||||
|
||||
st := cgroups.NewStats()
|
||||
|
||||
// pids (since kernel 4.5)
|
||||
if err := statPids(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
// Note cpu.stat is available even if the controller is not enabled.
|
||||
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// PSI (since kernel 4.20).
|
||||
var err error
|
||||
if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// misc (since kernel 5.13)
|
||||
if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.config.Rootless {
|
||||
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
if m.config.Resources == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources.Freezer = state
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return cgroups.RemovePath(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) Path(_ string) string {
|
||||
return m.dirPath
|
||||
}
|
||||
|
||||
func (m *Manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if err := m.getControllers(); err != nil {
|
||||
return err
|
||||
}
|
||||
// pids (since kernel 4.5)
|
||||
if err := setPids(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := setMemory(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := setIo(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if err := setCpu(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
//
|
||||
// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if err := setDevices(m.dirPath, r); err != nil {
|
||||
if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if err := setCpuset(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := setHugeTlb(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := m.setUnified(r.Unified); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources = r
|
||||
return nil
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, r *configs.Resources) error {
|
||||
if cgroups.DevicesSetV2 == nil {
|
||||
if len(r.Devices) > 0 {
|
||||
return cgroups.ErrDevicesUnsupported
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return cgroups.DevicesSetV2(dirPath, r)
|
||||
}
|
||||
|
||||
func (m *Manager) setUnified(res map[string]string) error {
|
||||
for k, v := range res {
|
||||
if strings.Contains(k, "/") {
|
||||
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
if err := cgroups.WriteFileByLine(m.dirPath, k, v); err != nil {
|
||||
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
|
||||
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
|
||||
// Check if a controller is available,
|
||||
// to give more specific error if not.
|
||||
sk := strings.SplitN(k, ".", 2)
|
||||
if len(sk) != 2 {
|
||||
return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||
}
|
||||
c := sk[0]
|
||||
if _, ok := m.controllers[c]; !ok && c != "cgroup" {
|
||||
return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("unable to set unified resource %q: %w", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.dirPath
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return getFreezer(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return cgroups.PathExists(m.dirPath)
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *Manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.dirPath)
|
||||
if err != nil && m.config.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
||||
|
||||
func CheckMemoryUsage(dirPath string, r *configs.Resources) error {
|
||||
if !r.MemoryCheckBeforeUpdate {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.Memory <= 0 && r.MemorySwap <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current")
|
||||
if err != nil {
|
||||
// This check is on best-effort basis, so if we can't read the
|
||||
// current usage (cgroup not yet created, or any other error),
|
||||
// we should not fail.
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.MemorySwap > 0 {
|
||||
if uint64(r.MemorySwap) <= usage {
|
||||
return fmt.Errorf("rejecting memory+swap limit %d <= usage %d", r.MemorySwap, usage)
|
||||
}
|
||||
}
|
||||
|
||||
if r.Memory > 0 {
|
||||
if uint64(r.Memory) <= usage {
|
||||
return fmt.Errorf("rejecting memory limit %d <= usage %d", r.Memory, usage)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
70
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
70
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isHugeTlbSet(r *configs.Resources) bool {
|
||||
return len(r.HugetlbLimit) > 0
|
||||
}
|
||||
|
||||
func setHugeTlb(dirPath string, r *configs.Resources) error {
|
||||
if !isHugeTlbSet(r) {
|
||||
return nil
|
||||
}
|
||||
const suffix = ".max"
|
||||
skipRsvd := false
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
prefix := "hugetlb." + hugetlb.Pagesize
|
||||
val := strconv.FormatUint(hugetlb.Limit, 10)
|
||||
if err := cgroups.WriteFile(dirPath, prefix+suffix, val); err != nil {
|
||||
return err
|
||||
}
|
||||
if skipRsvd {
|
||||
continue
|
||||
}
|
||||
if err := cgroups.WriteFile(dirPath, prefix+".rsvd"+suffix, val); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
skipRsvd = true
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
rsvd := ".rsvd"
|
||||
for _, pagesize := range cgroups.HugePageSizes() {
|
||||
again:
|
||||
prefix := "hugetlb." + pagesize + rsvd
|
||||
value, err := fscommon.GetCgroupParamUint(dirPath, prefix+".current")
|
||||
if err != nil {
|
||||
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
|
||||
rsvd = ""
|
||||
goto again
|
||||
}
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
value, err = fscommon.GetValueByKey(dirPath, prefix+".events", "max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pagesize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
193
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
193
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isIoSet(r *configs.Resources) bool {
|
||||
return r.BlkioWeight != 0 ||
|
||||
len(r.BlkioWeightDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteIOPSDevice) > 0
|
||||
}
|
||||
|
||||
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
|
||||
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
|
||||
func bfqDeviceWeightSupported(bfq *os.File) bool {
|
||||
if bfq == nil {
|
||||
return false
|
||||
}
|
||||
_, _ = bfq.Seek(0, 0)
|
||||
buf := make([]byte, 32)
|
||||
_, _ = bfq.Read(buf)
|
||||
// If only a single number (default weight) if read back, we have older kernel.
|
||||
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
|
||||
return err != nil
|
||||
}
|
||||
|
||||
func setIo(dirPath string, r *configs.Resources) error {
|
||||
if !isIoSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If BFQ IO scheduler is available, use it.
|
||||
var bfq *os.File
|
||||
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
|
||||
var err error
|
||||
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
|
||||
if err == nil {
|
||||
defer bfq.Close()
|
||||
} else if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
if bfq != nil { // Use BFQ.
|
||||
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Fallback to io.weight with a conversion scheme.
|
||||
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
|
||||
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if bfqDeviceWeightSupported(bfq) {
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
|
||||
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, &parseError{Path: dirPath, File: name, Err: err}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "io.stat"
|
||||
values, err := readCgroup2MapFile(dirPath, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var parsedStats cgroups.BlkioStats
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
major, err := strconv.ParseUint(d[0], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Map to the cgroupv1 naming and layout (in separate tables).
|
||||
var targetTable *[]cgroups.BlkioStatEntry
|
||||
switch op {
|
||||
// Equivalent to cgroupv1's blkio.io_service_bytes.
|
||||
case "rbytes":
|
||||
op = "Read"
|
||||
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||
case "wbytes":
|
||||
op = "Write"
|
||||
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||
// Equivalent to cgroupv1's blkio.io_serviced.
|
||||
case "rios":
|
||||
op = "Read"
|
||||
targetTable = &parsedStats.IoServicedRecursive
|
||||
case "wios":
|
||||
op = "Write"
|
||||
targetTable = &parsedStats.IoServicedRecursive
|
||||
default:
|
||||
// Skip over entries we cannot map to cgroupv1 stats for now.
|
||||
// In the future we should expand the stats struct to include
|
||||
// them.
|
||||
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
|
||||
continue
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
*targetTable = append(*targetTable, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = parsedStats
|
||||
return nil
|
||||
}
|
242
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
242
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
@ -0,0 +1,242 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// numToStr converts an int64 value to a string for writing to a
|
||||
// cgroupv2 files with .min, .max, .low, or .high suffix.
|
||||
// The value of -1 is converted to "max" for cgroupv1 compatibility
|
||||
// (which used to write -1 to remove the limit).
|
||||
func numToStr(value int64) (ret string) {
|
||||
switch {
|
||||
case value == 0:
|
||||
ret = ""
|
||||
case value == -1:
|
||||
ret = "max"
|
||||
default:
|
||||
ret = strconv.FormatInt(value, 10)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func isMemorySet(r *configs.Resources) bool {
|
||||
return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0
|
||||
}
|
||||
|
||||
func setMemory(dirPath string, r *configs.Resources) error {
|
||||
if !isMemorySet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := CheckMemoryUsage(dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
swapStr := numToStr(swap)
|
||||
if swapStr == "" && swap == 0 && r.MemorySwap > 0 {
|
||||
// memory and memorySwap set to the same value -- disable swap
|
||||
swapStr = "0"
|
||||
}
|
||||
// never write empty string to `memory.swap.max`, it means set to 0.
|
||||
if swapStr != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
|
||||
// If swap is not enabled, silently ignore setting to max or disabling it.
|
||||
if !(errors.Is(err, os.ErrNotExist) && (swapStr == "max" || swapStr == "0")) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if val := numToStr(r.Memory); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if val := numToStr(r.MemoryReservation); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
|
||||
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
|
||||
// cgroup v2 is always hierarchical.
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
|
||||
// The root cgroup does not have memory.{current,max,peak}
|
||||
// so emulate those using data from /proc/meminfo and
|
||||
// /sys/fs/cgroup/memory.stat
|
||||
return rootStatsFromMeminfo(stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapOnlyUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapOnlyUsage = swapOnlyUsage
|
||||
swapUsage := swapOnlyUsage
|
||||
// As cgroup v1 reports SwapUsage values as mem+swap combined,
|
||||
// while in cgroup v2 swap values do not include memory,
|
||||
// report combined mem+swap for v1 compatibility.
|
||||
swapUsage.Usage += memoryUsage.Usage
|
||||
if swapUsage.Limit != math.MaxUint64 {
|
||||
swapUsage.Limit += memoryUsage.Limit
|
||||
}
|
||||
// The `MaxUsage` of mem+swap cannot simply combine mem with
|
||||
// swap. So set it to 0 for v1 compatibility.
|
||||
swapUsage.MaxUsage = 0
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
usage := moduleName + ".current"
|
||||
limit := moduleName + ".max"
|
||||
maxUsage := moduleName + ".peak"
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore EEXIST as there's no swap accounting
|
||||
// if kernel CONFIG_MEMCG_SWAP is not set or
|
||||
// swapaccount=0 kernel boot parameter is given.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
// `memory.peak` since kernel 5.19
|
||||
// `memory.swap.peak` since kernel 6.5
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func rootStatsFromMeminfo(stats *cgroups.Stats) error {
|
||||
const file = "/proc/meminfo"
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Fields we are interested in.
|
||||
var (
|
||||
swap_free uint64
|
||||
swap_total uint64
|
||||
)
|
||||
mem := map[string]*uint64{
|
||||
"SwapFree": &swap_free,
|
||||
"SwapTotal": &swap_total,
|
||||
}
|
||||
|
||||
found := 0
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.SplitN(sc.Text(), ":", 3)
|
||||
if len(parts) != 2 {
|
||||
// Should not happen.
|
||||
continue
|
||||
}
|
||||
k := parts[0]
|
||||
p, ok := mem[k]
|
||||
if !ok {
|
||||
// Unknown field -- not interested.
|
||||
continue
|
||||
}
|
||||
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
|
||||
*p, err = strconv.ParseUint(vStr, 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{File: file, Err: errors.New("bad value for " + k)}
|
||||
}
|
||||
|
||||
found++
|
||||
if found == len(mem) {
|
||||
// Got everything we need -- skip the rest.
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: "", File: file, Err: err}
|
||||
}
|
||||
|
||||
// cgroup v1 `usage_in_bytes` reports memory usage as the sum of
|
||||
// - rss (NR_ANON_MAPPED)
|
||||
// - cache (NR_FILE_PAGES)
|
||||
// cgroup v1 reports SwapUsage values as mem+swap combined
|
||||
// cgroup v2 reports rss and cache as anon and file.
|
||||
// sum `anon` + `file` to report the same value as `usage_in_bytes` in v1.
|
||||
// sum swap usage as combined mem+swap usage for consistency as well.
|
||||
stats.MemoryStats.Usage.Usage = stats.MemoryStats.Stats["anon"] + stats.MemoryStats.Stats["file"]
|
||||
stats.MemoryStats.Usage.Limit = math.MaxUint64
|
||||
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
|
||||
stats.MemoryStats.SwapUsage.Limit = math.MaxUint64
|
||||
stats.MemoryStats.SwapUsage.Usage += stats.MemoryStats.Usage.Usage
|
||||
|
||||
return nil
|
||||
}
|
52
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go
generated
vendored
Normal file
52
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
func statMisc(dirPath string, stats *cgroups.Stats) error {
|
||||
for _, file := range []string{"current", "events"} {
|
||||
fd, err := cgroups.OpenFile(dirPath, "misc."+file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s := bufio.NewScanner(fd)
|
||||
for s.Scan() {
|
||||
key, value, err := fscommon.ParseKeyValue(s.Text())
|
||||
if err != nil {
|
||||
fd.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
key = strings.TrimSuffix(key, ".max")
|
||||
|
||||
if _, ok := stats.MiscStats[key]; !ok {
|
||||
stats.MiscStats[key] = cgroups.MiscStats{}
|
||||
}
|
||||
|
||||
tmp := stats.MiscStats[key]
|
||||
|
||||
switch file {
|
||||
case "current":
|
||||
tmp.Usage = value
|
||||
case "events":
|
||||
tmp.Events = value
|
||||
}
|
||||
|
||||
stats.MiscStats[key] = tmp
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
72
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
72
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isPidsSet(r *configs.Resources) bool {
|
||||
return r.PidsLimit != 0
|
||||
}
|
||||
|
||||
func setPids(dirPath string, r *configs.Resources) error {
|
||||
if !isPidsSet(r) {
|
||||
return nil
|
||||
}
|
||||
if val := numToStr(r.PidsLimit); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
|
||||
if errors.Is(err, unix.ENOTSUP) {
|
||||
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := strings.Count(contents, "\n")
|
||||
stats.PidsStats.Current = uint64(pids)
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return statPidsFromCgroupProcs(dirPath, stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
89
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go
generated
vendored
Normal file
89
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) {
|
||||
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// Kernel < 4.20, or CONFIG_PSI is not set,
|
||||
// or PSI stats are turned off for the cgroup
|
||||
// ("echo 0 > cgroup.pressure", kernel >= 6.1).
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var psistats cgroups.PSIStats
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.Fields(sc.Text())
|
||||
var pv *cgroups.PSIData
|
||||
switch parts[0] {
|
||||
case "some":
|
||||
pv = &psistats.Some
|
||||
case "full":
|
||||
pv = &psistats.Full
|
||||
}
|
||||
if pv != nil {
|
||||
*pv, err = parsePSIData(parts[1:])
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
if errors.Is(err, unix.ENOTSUP) {
|
||||
// Some kernels (e.g. CS9) may return ENOTSUP on read
|
||||
// if psi=1 kernel cmdline parameter is required.
|
||||
return nil, nil
|
||||
}
|
||||
return nil, &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
return &psistats, nil
|
||||
}
|
||||
|
||||
func parsePSIData(psi []string) (cgroups.PSIData, error) {
|
||||
data := cgroups.PSIData{}
|
||||
for _, f := range psi {
|
||||
kv := strings.SplitN(f, "=", 2)
|
||||
if len(kv) != 2 {
|
||||
return data, fmt.Errorf("invalid psi data: %q", f)
|
||||
}
|
||||
var pv *float64
|
||||
switch kv[0] {
|
||||
case "avg10":
|
||||
pv = &data.Avg10
|
||||
case "avg60":
|
||||
pv = &data.Avg60
|
||||
case "avg300":
|
||||
pv = &data.Avg300
|
||||
case "total":
|
||||
v, err := strconv.ParseUint(kv[1], 10, 64)
|
||||
if err != nil {
|
||||
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
|
||||
}
|
||||
data.Total = v
|
||||
}
|
||||
if pv != nil {
|
||||
v, err := strconv.ParseFloat(kv[1], 64)
|
||||
if err != nil {
|
||||
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
|
||||
}
|
||||
*pv = v
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// parseRdmaKV parses raw string to RdmaEntry.
|
||||
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
|
||||
var value uint32
|
||||
|
||||
parts := strings.SplitN(raw, "=", 3)
|
||||
|
||||
if len(parts) != 2 {
|
||||
return errors.New("Unable to parse RDMA entry")
|
||||
}
|
||||
|
||||
k, v := parts[0], parts[1]
|
||||
|
||||
if v == "max" {
|
||||
value = math.MaxUint32
|
||||
} else {
|
||||
val64, err := strconv.ParseUint(v, 10, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value = uint32(val64)
|
||||
}
|
||||
if k == "hca_handle" {
|
||||
entry.HcaHandles = value
|
||||
} else if k == "hca_object" {
|
||||
entry.HcaObjects = value
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
|
||||
// example entry: mlx4_0 hca_handle=2 hca_object=2000
|
||||
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
|
||||
rdmaEntries := make([]cgroups.RdmaEntry, 0)
|
||||
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fd.Close() //nolint:errorlint
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
parts := strings.SplitN(scanner.Text(), " ", 4)
|
||||
if len(parts) == 3 {
|
||||
entry := new(cgroups.RdmaEntry)
|
||||
entry.Device = parts[0]
|
||||
err = parseRdmaKV(parts[1], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
err = parseRdmaKV(parts[2], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
rdmaEntries = append(rdmaEntries, *entry)
|
||||
}
|
||||
}
|
||||
return rdmaEntries, scanner.Err()
|
||||
}
|
||||
|
||||
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
|
||||
func RdmaGetStats(path string, stats *cgroups.Stats) error {
|
||||
currentEntries, err := readRdmaEntries(path, "rdma.current")
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
maxEntries, err := readRdmaEntries(path, "rdma.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If device got removed between reading two files, ignore returning stats.
|
||||
if len(currentEntries) != len(maxEntries) {
|
||||
return nil
|
||||
}
|
||||
|
||||
stats.RdmaStats = cgroups.RdmaStats{
|
||||
RdmaLimit: maxEntries,
|
||||
RdmaCurrent: currentEntries,
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createCmdString(device string, limits configs.LinuxRdma) string {
|
||||
cmdString := device
|
||||
if limits.HcaHandles != nil {
|
||||
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
|
||||
}
|
||||
if limits.HcaObjects != nil {
|
||||
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
|
||||
}
|
||||
return cmdString
|
||||
}
|
||||
|
||||
// RdmaSet sets RDMA resources.
|
||||
func RdmaSet(path string, r *configs.Resources) error {
|
||||
for device, limits := range r.Rdma {
|
||||
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
145
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
Normal file
145
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
var (
|
||||
// Deprecated: use cgroups.OpenFile instead.
|
||||
OpenFile = cgroups.OpenFile
|
||||
// Deprecated: use cgroups.ReadFile instead.
|
||||
ReadFile = cgroups.ReadFile
|
||||
// Deprecated: use cgroups.WriteFile instead.
|
||||
WriteFile = cgroups.WriteFile
|
||||
)
|
||||
|
||||
// ParseError records a parse error details, including the file path.
|
||||
type ParseError struct {
|
||||
Path string
|
||||
File string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *ParseError) Error() string {
|
||||
return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
func (e *ParseError) Unwrap() error { return e.Err }
|
||||
|
||||
// ParseUint converts a string to an uint64 integer.
|
||||
// Negative values are returned at zero as, due to kernel bugs,
|
||||
// some of the memory cgroup stats can be negative.
|
||||
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// ParseKeyValue parses a space-separated "name value" kind of cgroup
|
||||
// parameter and returns its key as a string, and its value as uint64
|
||||
// (ParseUint is used to convert the value). For example,
|
||||
// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
|
||||
func ParseKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.SplitN(t, " ", 3)
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("line %q is not in key value format", t)
|
||||
}
|
||||
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
}
|
||||
|
||||
// GetValueByKey reads a key-value pairs from the specified cgroup file,
|
||||
// and returns a value of the specified key. ParseUint is used for value
|
||||
// conversion.
|
||||
func GetValueByKey(path, file, key string) (uint64, error) {
|
||||
content, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
val, err := ParseUint(arr[1], 10, 64)
|
||||
if err != nil {
|
||||
err = &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return val, err
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
|
||||
// If the value read is "max", the math.MaxUint64 is returned.
|
||||
func GetCgroupParamUint(path, file string) (uint64, error) {
|
||||
contents, err := GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
contents = strings.TrimSpace(contents)
|
||||
if contents == "max" {
|
||||
return math.MaxUint64, nil
|
||||
}
|
||||
|
||||
res, err := ParseUint(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamInt reads a single int64 value from specified cgroup file.
|
||||
// If the value read is "max", the math.MaxInt64 is returned.
|
||||
func GetCgroupParamInt(path, file string) (int64, error) {
|
||||
contents, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
contents = strings.TrimSpace(contents)
|
||||
if contents == "max" {
|
||||
return math.MaxInt64, nil
|
||||
}
|
||||
|
||||
res, err := strconv.ParseInt(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamString reads a string from the specified cgroup file.
|
||||
func GetCgroupParamString(path, file string) (string, error) {
|
||||
contents, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(contents), nil
|
||||
}
|
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetAllPids returns all pids from the cgroup identified by path, and all its
|
||||
// sub-cgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
if !d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
cPids, err := readProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids = append(pids, cPids...)
|
||||
return nil
|
||||
})
|
||||
return pids, err
|
||||
}
|
78
vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
generated
vendored
Normal file
78
vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
generated
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
package manager
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// New returns the instance of a cgroup manager, which is chosen
|
||||
// based on the local environment (whether cgroup v1 or v2 is used)
|
||||
// and the config (whether config.Systemd is set or not).
|
||||
func New(config *configs.Cgroup) (cgroups.Manager, error) {
|
||||
return NewWithPaths(config, nil)
|
||||
}
|
||||
|
||||
// NewWithPaths is similar to New, and can be used in case cgroup paths
|
||||
// are already well known, which can save some resources.
|
||||
//
|
||||
// For cgroup v1, the keys are controller/subsystem name, and the values
|
||||
// are absolute filesystem paths to the appropriate cgroups.
|
||||
//
|
||||
// For cgroup v2, the only key allowed is "" (empty string), and the value
|
||||
// is the unified cgroup path.
|
||||
func NewWithPaths(config *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
return nil, errors.New("cgroups/manager.New: config must not be nil")
|
||||
}
|
||||
if config.Systemd && !systemd.IsRunningSystemd() {
|
||||
return nil, errors.New("systemd not running on this host, cannot use systemd cgroups manager")
|
||||
}
|
||||
|
||||
// Cgroup v2 aka unified hierarchy.
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
path, err := getUnifiedPath(paths)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("manager.NewWithPaths: inconsistent paths: %w", err)
|
||||
}
|
||||
if config.Systemd {
|
||||
return systemd.NewUnifiedManager(config, path)
|
||||
}
|
||||
return fs2.NewManager(config, path)
|
||||
}
|
||||
|
||||
// Cgroup v1.
|
||||
if config.Systemd {
|
||||
return systemd.NewLegacyManager(config, paths)
|
||||
}
|
||||
|
||||
return fs.NewManager(config, paths)
|
||||
}
|
||||
|
||||
// getUnifiedPath is an implementation detail of libcontainer.
|
||||
// Historically, libcontainer.Create saves cgroup paths as per-subsystem path
|
||||
// map (as returned by cm.GetPaths(""), but with v2 we only have one single
|
||||
// unified path (with "" as a key).
|
||||
//
|
||||
// This function converts from that map to string (using "" as a key),
|
||||
// and also checks that the map itself is sane.
|
||||
func getUnifiedPath(paths map[string]string) (string, error) {
|
||||
if len(paths) > 1 {
|
||||
return "", fmt.Errorf("expected a single path, got %+v", paths)
|
||||
}
|
||||
path := paths[""]
|
||||
// can be empty
|
||||
if path != "" {
|
||||
if filepath.Clean(path) != path || !filepath.IsAbs(path) {
|
||||
return "", fmt.Errorf("invalid path: %q", path)
|
||||
}
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
200
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
Normal file
200
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
Normal file
@ -0,0 +1,200 @@
|
||||
package cgroups
|
||||
|
||||
type ThrottlingData struct {
|
||||
// Number of periods with throttling active
|
||||
Periods uint64 `json:"periods,omitempty"`
|
||||
// Number of periods when the container hit its throttling limit.
|
||||
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
|
||||
// Aggregate time the container was throttled for in nanoseconds.
|
||||
ThrottledTime uint64 `json:"throttled_time,omitempty"`
|
||||
}
|
||||
|
||||
// CpuUsage denotes the usage of a CPU.
|
||||
// All CPU stats are aggregate since container inception.
|
||||
type CpuUsage struct {
|
||||
// Total CPU time consumed.
|
||||
// Units: nanoseconds.
|
||||
TotalUsage uint64 `json:"total_usage,omitempty"`
|
||||
// Total CPU time consumed per core.
|
||||
// Units: nanoseconds.
|
||||
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
|
||||
// CPU time consumed per core in kernel mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
|
||||
// CPU time consumed per core in user mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
|
||||
// Time spent by tasks of the cgroup in kernel mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
|
||||
// Time spent by tasks of the cgroup in user mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInUsermode uint64 `json:"usage_in_usermode"`
|
||||
}
|
||||
|
||||
type PSIData struct {
|
||||
Avg10 float64 `json:"avg10"`
|
||||
Avg60 float64 `json:"avg60"`
|
||||
Avg300 float64 `json:"avg300"`
|
||||
Total uint64 `json:"total"`
|
||||
}
|
||||
|
||||
type PSIStats struct {
|
||||
Some PSIData `json:"some,omitempty"`
|
||||
Full PSIData `json:"full,omitempty"`
|
||||
}
|
||||
|
||||
type CpuStats struct {
|
||||
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
|
||||
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
|
||||
PSI *PSIStats `json:"psi,omitempty"`
|
||||
}
|
||||
|
||||
type CPUSetStats struct {
|
||||
// List of the physical numbers of the CPUs on which processes
|
||||
// in that cpuset are allowed to execute
|
||||
CPUs []uint16 `json:"cpus,omitempty"`
|
||||
// cpu_exclusive flag
|
||||
CPUExclusive uint64 `json:"cpu_exclusive"`
|
||||
// List of memory nodes on which processes in that cpuset
|
||||
// are allowed to allocate memory
|
||||
Mems []uint16 `json:"mems,omitempty"`
|
||||
// mem_hardwall flag
|
||||
MemHardwall uint64 `json:"mem_hardwall"`
|
||||
// mem_exclusive flag
|
||||
MemExclusive uint64 `json:"mem_exclusive"`
|
||||
// memory_migrate flag
|
||||
MemoryMigrate uint64 `json:"memory_migrate"`
|
||||
// memory_spread page flag
|
||||
MemorySpreadPage uint64 `json:"memory_spread_page"`
|
||||
// memory_spread slab flag
|
||||
MemorySpreadSlab uint64 `json:"memory_spread_slab"`
|
||||
// memory_pressure
|
||||
MemoryPressure uint64 `json:"memory_pressure"`
|
||||
// sched_load balance flag
|
||||
SchedLoadBalance uint64 `json:"sched_load_balance"`
|
||||
// sched_relax_domain_level
|
||||
SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"`
|
||||
}
|
||||
|
||||
type MemoryData struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
type MemoryStats struct {
|
||||
// memory used for cache
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
// usage of memory
|
||||
Usage MemoryData `json:"usage,omitempty"`
|
||||
// usage of memory + swap
|
||||
SwapUsage MemoryData `json:"swap_usage,omitempty"`
|
||||
// usage of swap only
|
||||
SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"`
|
||||
// usage of kernel memory
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
// usage of kernel TCP memory
|
||||
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||
// usage of memory pages by NUMA node
|
||||
// see chapter 5.6 of memory controller documentation
|
||||
PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
|
||||
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||
UseHierarchy bool `json:"use_hierarchy"`
|
||||
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
PSI *PSIStats `json:"psi,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMA struct {
|
||||
// Embedding is used as types can't be recursive.
|
||||
PageUsageByNUMAInner
|
||||
Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMAInner struct {
|
||||
Total PageStats `json:"total,omitempty"`
|
||||
File PageStats `json:"file,omitempty"`
|
||||
Anon PageStats `json:"anon,omitempty"`
|
||||
Unevictable PageStats `json:"unevictable,omitempty"`
|
||||
}
|
||||
|
||||
type PageStats struct {
|
||||
Total uint64 `json:"total,omitempty"`
|
||||
Nodes map[uint8]uint64 `json:"nodes,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
// active pids hard limit
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
Op string `json:"op,omitempty"`
|
||||
Value uint64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStats struct {
|
||||
// number of bytes transferred to and from the block device
|
||||
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
|
||||
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
|
||||
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
|
||||
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
|
||||
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
|
||||
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
|
||||
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
|
||||
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
|
||||
PSI *PSIStats `json:"psi,omitempty"`
|
||||
}
|
||||
|
||||
type HugetlbStats struct {
|
||||
// current res_counter usage for hugetlb
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// maximum usage ever recorded.
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
// number of times hugetlb usage allocation failure.
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type RdmaEntry struct {
|
||||
Device string `json:"device,omitempty"`
|
||||
HcaHandles uint32 `json:"hca_handles,omitempty"`
|
||||
HcaObjects uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
||||
|
||||
type RdmaStats struct {
|
||||
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
|
||||
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
|
||||
}
|
||||
|
||||
type MiscStats struct {
|
||||
// current resource usage for a key in misc
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// number of times the resource usage was about to go over the max boundary
|
||||
Events uint64 `json:"events,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
|
||||
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
|
||||
// the map is in the format "misc resource name: stats of the key"
|
||||
MiscStats map[string]MiscStats `json:"misc_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
||||
hugetlbStats := make(map[string]HugetlbStats)
|
||||
miscStats := make(map[string]MiscStats)
|
||||
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
|
||||
}
|
363
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
363
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
@ -0,0 +1,363 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
// Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2.
|
||||
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
||||
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
defCPUQuotaPeriod = uint64(100000)
|
||||
)
|
||||
|
||||
var (
|
||||
versionOnce sync.Once
|
||||
version int
|
||||
|
||||
isRunningSystemdOnce sync.Once
|
||||
isRunningSystemd bool
|
||||
|
||||
// GenerateDeviceProps is a function to generate systemd device
|
||||
// properties, used by Set methods. Unless
|
||||
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
|
||||
// package is imported, it is set to nil, so cgroup managers can't
|
||||
// configure devices.
|
||||
GenerateDeviceProps func(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error)
|
||||
)
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
isRunningSystemdOnce.Do(func() {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
isRunningSystemd = err == nil && fi.IsDir()
|
||||
})
|
||||
return isRunningSystemd
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return c.ScopePrefix + "-" + c.Name + ".scope"
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
// This code should be in sync with getUnitName.
|
||||
func getUnitType(unitName string) string {
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
return "Slice"
|
||||
}
|
||||
return "Scope"
|
||||
}
|
||||
|
||||
// isDbusError returns true if the error is a specific dbus error.
|
||||
func isDbusError(err error, name string) bool {
|
||||
if err != nil {
|
||||
var derr dbus.Error
|
||||
if errors.As(err, &derr) {
|
||||
return strings.Contains(derr.Name, name)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
|
||||
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
|
||||
statusChan := make(chan string, 1)
|
||||
retry := true
|
||||
|
||||
retry:
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
if ignoreExist {
|
||||
// TODO: remove this hack.
|
||||
// This is kubelet making sure a slice exists (see
|
||||
// https://github.com/opencontainers/runc/pull/1124).
|
||||
return nil
|
||||
}
|
||||
if retry {
|
||||
// In case a unit with the same name exists, this may
|
||||
// be a leftover failed unit. Reset it, so systemd can
|
||||
// remove it, and retry once.
|
||||
err = resetFailedUnit(cm, unitName)
|
||||
if err != nil {
|
||||
logrus.Warnf("unable to reset failed unit: %v", err)
|
||||
}
|
||||
retry = false
|
||||
goto retry
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
return errors.New("Timeout waiting for systemd to create " + unitName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopUnit(cm *dbusConnManager, unitName string) error {
|
||||
statusChan := make(chan string, 1)
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
|
||||
return err
|
||||
})
|
||||
if err == nil {
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
return errors.New("Timed out while waiting for systemd to remove " + unitName)
|
||||
}
|
||||
}
|
||||
|
||||
// In case of a failed unit, let systemd remove it.
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func resetFailedUnit(cm *dbusConnManager, name string) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.ResetFailedUnitContext(context.TODO(), name)
|
||||
})
|
||||
}
|
||||
|
||||
func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
|
||||
var prop *systemdDbus.Property
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
|
||||
prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
|
||||
return Err
|
||||
})
|
||||
return prop, err
|
||||
}
|
||||
|
||||
func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
|
||||
})
|
||||
}
|
||||
|
||||
func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
|
||||
str := ""
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
var err error
|
||||
str, err = c.GetManagerProperty(name)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strconv.Unquote(str)
|
||||
}
|
||||
|
||||
func systemdVersion(cm *dbusConnManager) int {
|
||||
versionOnce.Do(func() {
|
||||
version = -1
|
||||
verStr, err := getManagerProperty(cm, "Version")
|
||||
if err == nil {
|
||||
version, err = systemdVersionAtoi(verStr)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("unable to get systemd version")
|
||||
}
|
||||
})
|
||||
|
||||
return version
|
||||
}
|
||||
|
||||
// systemdVersionAtoi extracts a numeric systemd version from the argument.
|
||||
// The argument should be of the form: "v245.4-1.fc32", "245", "v245-1.fc32",
|
||||
// "245-1.fc32" (with or without quotes). The result for all of the above
|
||||
// should be 245.
|
||||
func systemdVersionAtoi(str string) (int, error) {
|
||||
// Unconditionally remove the leading prefix ("v).
|
||||
str = strings.TrimLeft(str, `"v`)
|
||||
// Match on the first integer we can grab.
|
||||
for i := 0; i < len(str); i++ {
|
||||
if str[i] < '0' || str[i] > '9' {
|
||||
// First non-digit: cut the tail.
|
||||
str = str[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
ver, err := strconv.Atoi(str)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("can't parse version: %w", err)
|
||||
}
|
||||
return ver, nil
|
||||
}
|
||||
|
||||
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
if period != 0 {
|
||||
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 242 {
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPeriodUSec", period))
|
||||
} else {
|
||||
logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
|
||||
" (setting will still be applied to cgroupfs)", sdVer)
|
||||
}
|
||||
}
|
||||
if quota != 0 || period != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
if period == 0 {
|
||||
// assume the default
|
||||
period = defCPUQuotaPeriod
|
||||
}
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
}
|
||||
|
||||
func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
|
||||
if cpus == "" && mems == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer < 244 {
|
||||
logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
|
||||
" (settings will still be applied to cgroupfs)", sdVer)
|
||||
return nil
|
||||
}
|
||||
|
||||
if cpus != "" {
|
||||
bits, err := RangeToBits(cpus)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
|
||||
cpus, err)
|
||||
}
|
||||
*props = append(*props,
|
||||
newProp("AllowedCPUs", bits))
|
||||
}
|
||||
if mems != "" {
|
||||
bits, err := RangeToBits(mems)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
|
||||
mems, err)
|
||||
}
|
||||
*props = append(*props,
|
||||
newProp("AllowedMemoryNodes", bits))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateDeviceProperties takes the configured device rules and generates a
|
||||
// corresponding set of systemd properties to configure the devices correctly.
|
||||
func generateDeviceProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
if GenerateDeviceProps == nil {
|
||||
if len(r.Devices) > 0 {
|
||||
return nil, cgroups.ErrDevicesUnsupported
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return GenerateDeviceProps(r, systemdVersion(cm))
|
||||
}
|
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
Normal file
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math/big"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RangeToBits converts a text representation of a CPU mask (as written to
|
||||
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
|
||||
// with the corresponding bits set (as consumed by systemd over dbus as
|
||||
// AllowedCPUs/AllowedMemoryNodes unit property value).
|
||||
func RangeToBits(str string) ([]byte, error) {
|
||||
bits := new(big.Int)
|
||||
|
||||
for _, r := range strings.Split(str, ",") {
|
||||
// allow extra spaces around
|
||||
r = strings.TrimSpace(r)
|
||||
// allow empty elements (extra commas)
|
||||
if r == "" {
|
||||
continue
|
||||
}
|
||||
startr, endr, ok := strings.Cut(r, "-")
|
||||
if ok {
|
||||
start, err := strconv.ParseUint(startr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
end, err := strconv.ParseUint(endr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if start > end {
|
||||
return nil, errors.New("invalid range: " + r)
|
||||
}
|
||||
for i := start; i <= end; i++ {
|
||||
bits.SetBit(bits, int(i), 1)
|
||||
}
|
||||
} else {
|
||||
val, err := strconv.ParseUint(startr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bits.SetBit(bits, int(val), 1)
|
||||
}
|
||||
}
|
||||
|
||||
ret := bits.Bytes()
|
||||
if len(ret) == 0 {
|
||||
// do not allow empty values
|
||||
return nil, errors.New("empty value")
|
||||
}
|
||||
|
||||
// fit cpuset parsing order in systemd
|
||||
for l, r := 0, len(ret)-1; l < r; l, r = l+1, r-1 {
|
||||
ret[l], ret[r] = ret[r], ret[l]
|
||||
}
|
||||
return ret, nil
|
||||
}
|
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
@ -0,0 +1,102 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
)
|
||||
|
||||
var (
|
||||
dbusC *systemdDbus.Conn
|
||||
dbusMu sync.RWMutex
|
||||
dbusInited bool
|
||||
dbusRootless bool
|
||||
)
|
||||
|
||||
type dbusConnManager struct{}
|
||||
|
||||
// newDbusConnManager initializes systemd dbus connection manager.
|
||||
func newDbusConnManager(rootless bool) *dbusConnManager {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusInited && rootless != dbusRootless {
|
||||
panic("can't have both root and rootless dbus")
|
||||
}
|
||||
dbusInited = true
|
||||
dbusRootless = rootless
|
||||
return &dbusConnManager{}
|
||||
}
|
||||
|
||||
// getConnection lazily initializes and returns systemd dbus connection.
|
||||
func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) {
|
||||
// In the case where dbusC != nil
|
||||
// Use the read lock the first time to ensure
|
||||
// that Conn can be acquired at the same time.
|
||||
dbusMu.RLock()
|
||||
if conn := dbusC; conn != nil {
|
||||
dbusMu.RUnlock()
|
||||
return conn, nil
|
||||
}
|
||||
dbusMu.RUnlock()
|
||||
|
||||
// In the case where dbusC == nil
|
||||
// Use write lock to ensure that only one
|
||||
// will be created
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if conn := dbusC; conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
conn, err := d.newConnection()
|
||||
if err != nil {
|
||||
// When dbus-user-session is not installed, we can't detect whether we should try to connect to user dbus or system dbus, so d.dbusRootless is set to false.
|
||||
// This may fail with a cryptic error "read unix @->/run/systemd/private: read: connection reset by peer: unknown."
|
||||
// https://github.com/moby/moby/issues/42793
|
||||
return nil, fmt.Errorf("failed to connect to dbus (hint: for rootless containers, maybe you need to install dbus-user-session package, see https://github.com/opencontainers/runc/blob/master/docs/cgroup-v2.md): %w", err)
|
||||
}
|
||||
dbusC = conn
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
|
||||
if dbusRootless {
|
||||
return newUserSystemdDbus()
|
||||
}
|
||||
return systemdDbus.NewWithContext(context.TODO())
|
||||
}
|
||||
|
||||
// resetConnection resets the connection to its initial state
|
||||
// (so it can be reconnected if necessary).
|
||||
func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusC != nil && dbusC == conn {
|
||||
dbusC.Close()
|
||||
dbusC = nil
|
||||
}
|
||||
}
|
||||
|
||||
// retryOnDisconnect calls op, and if the error it returns is about closed dbus
|
||||
// connection, the connection is re-established and the op is retried. This helps
|
||||
// with the situation when dbus is restarted and we have a stale connection.
|
||||
func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error {
|
||||
for {
|
||||
conn, err := d.getConnection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = op(conn)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !errors.Is(err, dbus.ErrClosed) {
|
||||
return err
|
||||
}
|
||||
d.resetConnection(conn)
|
||||
}
|
||||
}
|
74
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/devices.go
generated
vendored
Normal file
74
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/devices.go
generated
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// freezeBeforeSet answers whether there is a need to freeze the cgroup before
|
||||
// applying its systemd unit properties, and thaw after, while avoiding
|
||||
// unnecessary freezer state changes.
|
||||
//
|
||||
// The reason why we have to freeze is that systemd's application of device
|
||||
// rules is done disruptively, resulting in spurious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we have to freeze the container to avoid the container get
|
||||
// an occasional "permission denied" error.
|
||||
func (m *LegacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) {
|
||||
// Special case for SkipDevices, as used by Kubernetes to create pod
|
||||
// cgroups with allow-all device policy).
|
||||
if r.SkipDevices {
|
||||
if r.SkipFreezeOnSet {
|
||||
// Both needsFreeze and needsThaw are false.
|
||||
return
|
||||
}
|
||||
|
||||
// No need to freeze if SkipDevices is set, and either
|
||||
// (1) systemd unit does not (yet) exist, or
|
||||
// (2) it has DevicePolicy=auto and empty DeviceAllow list.
|
||||
//
|
||||
// Interestingly, (1) and (2) are the same here because
|
||||
// a non-existent unit returns default properties,
|
||||
// and settings in (2) are the defaults.
|
||||
//
|
||||
// Do not return errors from getUnitTypeProperty, as they alone
|
||||
// should not prevent Set from working.
|
||||
|
||||
unitType := getUnitType(unitName)
|
||||
|
||||
devPolicy, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DevicePolicy")
|
||||
if e == nil && devPolicy.Value == dbus.MakeVariant("auto") {
|
||||
devAllow, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DeviceAllow")
|
||||
if e == nil {
|
||||
if rv := reflect.ValueOf(devAllow.Value.Value()); rv.Kind() == reflect.Slice && rv.Len() == 0 {
|
||||
needsFreeze = false
|
||||
needsThaw = false
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
needsFreeze = true
|
||||
needsThaw = true
|
||||
|
||||
// Check the current freezer state.
|
||||
freezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if freezerState == configs.Frozen {
|
||||
// Already frozen, and should stay frozen.
|
||||
needsFreeze = false
|
||||
needsThaw = false
|
||||
}
|
||||
|
||||
if r.Freezer == configs.Frozen {
|
||||
// Will be frozen anyway -- no need to thaw.
|
||||
needsThaw = false
|
||||
}
|
||||
return
|
||||
}
|
93
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
93
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/moby/sys/userns"
|
||||
)
|
||||
|
||||
// newUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func newUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
addr, err := DetectUserDbusSessionBusAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uid, err := DetectUID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||
conn, err := dbus.Dial(addr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
|
||||
}
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
return conn, nil
|
||||
})
|
||||
}
|
||||
|
||||
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
|
||||
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
|
||||
//
|
||||
// Otherwise returns os.Getuid() .
|
||||
func DetectUID() (int, error) {
|
||||
if !userns.RunningInUserNS() {
|
||||
return os.Getuid(), nil
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "OwnerUID=") {
|
||||
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||
i, err := strconv.Atoi(uidStr)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return -1, errors.New("could not detect the OwnerUID")
|
||||
}
|
||||
|
||||
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS, if set.
|
||||
// Otherwise it returns "unix:path=$XDG_RUNTIME_DIR/bus", if $XDG_RUNTIME_DIR/bus exists.
|
||||
func DetectUserDbusSessionBusAddress() (string, error) {
|
||||
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
|
||||
return env, nil
|
||||
}
|
||||
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
|
||||
busPath := filepath.Join(xdr, "bus")
|
||||
if _, err := os.Stat(busPath); err == nil {
|
||||
busAddress := "unix:path=" + dbus.EscapeBusAddressValue(busPath)
|
||||
return busAddress, nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from the environment; make sure you have installed the dbus-user-session or dbus-daemon package; note you may need to re-login")
|
||||
}
|
413
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
Normal file
413
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
Normal file
@ -0,0 +1,413 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type LegacyManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
dbus *dbusConnManager
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (*LegacyManager, error) {
|
||||
if cg.Rootless {
|
||||
return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
|
||||
}
|
||||
if cg.Resources != nil && cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &LegacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
dbus: newDbusConnManager(false),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Set sets cgroup resource limits.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
var legacySubsystems = []subsystem{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
&fs.RdmaGroup{},
|
||||
&fs.NameGroup{GroupName: "misc"},
|
||||
}
|
||||
|
||||
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
deviceProperties, err := generateDeviceProperties(r, cm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(r.Memory)))
|
||||
}
|
||||
|
||||
if r.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", r.CpuShares))
|
||||
}
|
||||
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(r.BlkioWeight)))
|
||||
}
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// initPaths figures out and returns paths to cgroups.
|
||||
func initPaths(c *configs.Cgroup) (map[string]string, error) {
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
unit := getUnitName(c)
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if s.Name() == "devices" {
|
||||
return nil, err
|
||||
}
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
|
||||
// If systemd is using cgroups-hybrid mode then add the slice path of
|
||||
// this container to the paths so the following process executed with
|
||||
// "runc exec" joins that cgroup as well.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
// "" means cgroup-hybrid path
|
||||
cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
|
||||
if err != nil && cgroups.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
paths[""] = cgroupsHybridPath
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := m.joinCgroups(pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
|
||||
|
||||
// Both on success and on error, cleanup all the cgroups
|
||||
// we are aware of, as some of them were created directly
|
||||
// by Apply() and are not managed by systemd.
|
||||
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return stopErr
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *LegacyManager) joinCgroups(pid int) error {
|
||||
for _, sys := range legacySubsystems {
|
||||
name := sys.Name()
|
||||
switch name {
|
||||
case "name=systemd":
|
||||
// let systemd handle this
|
||||
case "cpuset":
|
||||
if path, ok := m.paths[name]; ok {
|
||||
s := &fs.CpusetGroup{}
|
||||
if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
if path, ok := m.paths[name]; ok {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(slice, unit, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, slice, unit), nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
|
||||
err := m.doFreeze(state)
|
||||
if err == nil {
|
||||
m.cgroups.Resources.Freezer = state
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// doFreeze is the same as Freeze but without
|
||||
// changing the m.cgroups.Resources.Frozen field.
|
||||
func (m *LegacyManager) doFreeze(state configs.FreezerState) error {
|
||||
path, ok := m.paths["freezer"]
|
||||
if !ok {
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
freezer := &fs.FreezerGroup{}
|
||||
resources := &configs.Resources{Freezer: state}
|
||||
return freezer.Set(path, resources)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPids() ([]int, error) {
|
||||
path, ok := m.paths["devices"]
|
||||
if !ok {
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetAllPids() ([]int, error) {
|
||||
path, ok := m.paths["devices"]
|
||||
if !ok {
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range legacySubsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
properties, err := genV1ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
unitName := getUnitName(m.cgroups)
|
||||
needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if needsFreeze {
|
||||
if err := m.doFreeze(configs.Frozen); err != nil {
|
||||
// If freezer cgroup isn't supported, we just warn about it.
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
// skip update the cgroup while frozen failed. #3803
|
||||
if !errors.Is(err, errSubsystemDoesNotExist) {
|
||||
if needsThaw {
|
||||
if thawErr := m.doFreeze(configs.Thawed); thawErr != nil {
|
||||
logrus.Infof("thaw container after doFreeze failed: %v", thawErr)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
setErr := setUnitProperties(m.dbus, unitName, properties...)
|
||||
if needsThaw {
|
||||
if err := m.doFreeze(configs.Thawed); err != nil {
|
||||
logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
|
||||
}
|
||||
}
|
||||
if setErr != nil {
|
||||
return setErr
|
||||
}
|
||||
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, ok := m.paths[sys.Name()]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
path, ok := m.paths["freezer"]
|
||||
if !ok {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &fs.FreezerGroup{}
|
||||
return freezer.GetState(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *LegacyManager) OOMKillCount() (uint64, error) {
|
||||
return fs.OOMKillCount(m.Path("memory"))
|
||||
}
|
516
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
516
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
@ -0,0 +1,516 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cpuIdleSupportedVersion = 252
|
||||
)
|
||||
|
||||
type UnifiedManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
dbus *dbusConnManager
|
||||
fsMgr cgroups.Manager
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string) (*UnifiedManager, error) {
|
||||
m := &UnifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
dbus: newDbusConnManager(config.Rootless),
|
||||
}
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fsMgr, err := fs2.NewManager(config, m.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.fsMgr = fsMgr
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func shouldSetCPUIdle(cm *dbusConnManager, v string) bool {
|
||||
// The only valid values for cpu.idle are 0 and 1. As it is
|
||||
// not possible to directly set cpu.idle to 0 via systemd,
|
||||
// ignore 0. Ignore other values as we'll error out later
|
||||
// in Set() while calling fsMgr.Set().
|
||||
return v == "1" && systemdVersion(cm) >= cpuIdleSupportedVersion
|
||||
}
|
||||
|
||||
// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
|
||||
// key/value map (where key is cgroupfs file name) to systemd unit properties.
|
||||
// This is on a best-effort basis, so the properties that are not known
|
||||
// (to this function and/or systemd) are ignored (but logged with "debug"
|
||||
// log level).
|
||||
//
|
||||
// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
//
|
||||
// For the list of systemd unit properties, see systemd.resource-control(5).
|
||||
func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) {
|
||||
var err error
|
||||
|
||||
for k, v := range res {
|
||||
if strings.Contains(k, "/") {
|
||||
return nil, fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
if strings.IndexByte(k, '.') <= 0 {
|
||||
return nil, fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||
}
|
||||
// Kernel is quite forgiving to extra whitespace
|
||||
// around the value, and so should we.
|
||||
v = strings.TrimSpace(v)
|
||||
// Please keep cases in alphabetical order.
|
||||
switch k {
|
||||
case "cpu.idle":
|
||||
if shouldSetCPUIdle(cm, v) {
|
||||
// Setting CPUWeight to 0 tells systemd
|
||||
// to set cpu.idle to 1.
|
||||
props = append(props,
|
||||
newProp("CPUWeight", uint64(0)))
|
||||
}
|
||||
|
||||
case "cpu.max":
|
||||
// value: quota [period]
|
||||
quota := int64(0) // 0 means "unlimited" for addCpuQuota, if period is set
|
||||
period := defCPUQuotaPeriod
|
||||
sv := strings.Fields(v)
|
||||
if len(sv) < 1 || len(sv) > 2 {
|
||||
return nil, fmt.Errorf("unified resource %q value invalid: %q", k, v)
|
||||
}
|
||||
// quota
|
||||
if sv[0] != "max" {
|
||||
quota, err = strconv.ParseInt(sv[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q period value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
// period
|
||||
if len(sv) == 2 {
|
||||
period, err = strconv.ParseUint(sv[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
addCpuQuota(cm, &props, quota, period)
|
||||
|
||||
case "cpu.weight":
|
||||
if shouldSetCPUIdle(cm, strings.TrimSpace(res["cpu.idle"])) {
|
||||
// Do not add duplicate CPUWeight property
|
||||
// (see case "cpu.idle" above).
|
||||
logrus.Warn("unable to apply both cpu.weight and cpu.idle to systemd, ignoring cpu.weight")
|
||||
continue
|
||||
}
|
||||
num, err := strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
props = append(props,
|
||||
newProp("CPUWeight", num))
|
||||
|
||||
case "cpuset.cpus", "cpuset.mems":
|
||||
bits, err := RangeToBits(v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
|
||||
}
|
||||
m := map[string]string{
|
||||
"cpuset.cpus": "AllowedCPUs",
|
||||
"cpuset.mems": "AllowedMemoryNodes",
|
||||
}
|
||||
// systemd only supports these properties since v244
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 244 {
|
||||
props = append(props,
|
||||
newProp(m[k], bits))
|
||||
} else {
|
||||
logrus.Debugf("systemd v%d is too old to support %s"+
|
||||
" (setting will still be applied to cgroupfs)",
|
||||
sdVer, m[k])
|
||||
}
|
||||
|
||||
case "memory.high", "memory.low", "memory.min", "memory.max", "memory.swap.max":
|
||||
num := uint64(math.MaxUint64)
|
||||
if v != "max" {
|
||||
num, err = strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
"memory.high": "MemoryHigh",
|
||||
"memory.low": "MemoryLow",
|
||||
"memory.min": "MemoryMin",
|
||||
"memory.max": "MemoryMax",
|
||||
"memory.swap.max": "MemorySwapMax",
|
||||
}
|
||||
props = append(props,
|
||||
newProp(m[k], num))
|
||||
|
||||
case "pids.max":
|
||||
num := uint64(math.MaxUint64)
|
||||
if v != "max" {
|
||||
var err error
|
||||
num, err = strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
props = append(props,
|
||||
newProp("TasksMax", num))
|
||||
|
||||
case "memory.oom.group":
|
||||
// Setting this to 1 is roughly equivalent to OOMPolicy=kill
|
||||
// (as per systemd.service(5) and
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html),
|
||||
// but it's not clear what to do if it is unset or set
|
||||
// to 0 in runc update, as there are two other possible
|
||||
// values for OOMPolicy (continue/stop).
|
||||
fallthrough
|
||||
|
||||
default:
|
||||
// Ignore the unknown resource here -- will still be
|
||||
// applied in Set which calls fs2.Set.
|
||||
logrus.Debugf("don't know how to convert unified resource %q=%q to systemd unit property; skipping (will still be applied to cgroupfs)", k, v)
|
||||
}
|
||||
}
|
||||
|
||||
return props, nil
|
||||
}
|
||||
|
||||
func genV2ResourcesProperties(dirPath string, r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
// We need this check before setting systemd properties, otherwise
|
||||
// the container is OOM-killed and the systemd unit is removed
|
||||
// before we get to fsMgr.Set().
|
||||
if err := fs2.CheckMemoryUsage(dirPath, r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
// aren't the end of the world, but it is a bit concerning. However
|
||||
// it's unclear if systemd removes all eBPF programs attached when
|
||||
// doing SetUnitProperties...
|
||||
deviceProperties, err := generateDeviceProperties(r, cm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryMax", uint64(r.Memory)))
|
||||
}
|
||||
if r.MemoryReservation != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLow", uint64(r.MemoryReservation)))
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if swap != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemorySwapMax", uint64(swap)))
|
||||
}
|
||||
|
||||
idleSet := false
|
||||
// The logic here is the same as in shouldSetCPUIdle.
|
||||
if r.CPUIdle != nil && *r.CPUIdle == 1 && systemdVersion(cm) >= cpuIdleSupportedVersion {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", uint64(0)))
|
||||
idleSet = true
|
||||
}
|
||||
if r.CpuWeight != 0 {
|
||||
if idleSet {
|
||||
// Ignore CpuWeight if CPUIdle is already set.
|
||||
logrus.Warn("unable to apply both CPUWeight and CpuIdle to systemd, ignoring CPUWeight")
|
||||
} else {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", r.CpuWeight))
|
||||
}
|
||||
}
|
||||
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ignore r.KernelMemory
|
||||
|
||||
// convert Resources.Unified map to systemd properties
|
||||
if r.Unified != nil {
|
||||
unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, unifiedProps...)
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
slice := "system.slice"
|
||||
if m.cgroups.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("IOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
|
||||
}
|
||||
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.OwnerUID != nil {
|
||||
// The directory itself must be chowned.
|
||||
err := os.Chown(m.path, *c.OwnerUID, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
filesToChown, err := cgroupFilesToChown()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, v := range filesToChown {
|
||||
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
|
||||
// Some files might not be present.
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// The kernel exposes a list of files that should be chowned to the delegate
|
||||
// uid in /sys/kernel/cgroup/delegate. If the file is not present
|
||||
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
|
||||
func cgroupFilesToChown() ([]string, error) {
|
||||
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
|
||||
|
||||
f, err := os.Open(cgroupDelegateFile)
|
||||
if err != nil {
|
||||
return []string{"cgroup.procs", "cgroup.subtree_control", "cgroup.threads"}, nil
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
filesToChown := []string{}
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
filesToChown = append(filesToChown, scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
|
||||
}
|
||||
|
||||
return filesToChown, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(m.dbus, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// systemd 239 do not remove sub-cgroups.
|
||||
err := m.fsMgr.Destroy()
|
||||
// fsMgr.Destroy has handled ErrNotExist
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Path(_ string) string {
|
||||
return m.path
|
||||
}
|
||||
|
||||
// getSliceFull value is used in initPath.
|
||||
// The value is incompatible with systemdDbus.PropSlice.
|
||||
func (m *UnifiedManager) getSliceFull() (string, error) {
|
||||
c := m.cgroups
|
||||
slice := "system.slice"
|
||||
if c.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if c.Rootless {
|
||||
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
|
||||
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
slice = filepath.Join(managerCG, slice)
|
||||
}
|
||||
|
||||
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
||||
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
||||
return slice, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) initPath() error {
|
||||
if m.path != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
sliceFull, err := m.getSliceFull()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c := m.cgroups
|
||||
path := filepath.Join(sliceFull, getUnitName(c))
|
||||
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// an example of the final path in rootless:
|
||||
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
||||
m.path = path
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
return m.fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
return m.fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||
return fmt.Errorf("unable to set unit properties: %w", err)
|
||||
}
|
||||
|
||||
return m.fsMgr.Set(r)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.path
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return m.fsMgr.GetFreezerState()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Exists() bool {
|
||||
return cgroups.PathExists(m.path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) OOMKillCount() (uint64, error) {
|
||||
return m.fsMgr.OOMKillCount()
|
||||
}
|
456
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
Normal file
456
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
Normal file
@ -0,0 +1,456 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/moby/sys/userns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
hybridMountpoint = "/sys/fs/cgroup/unified"
|
||||
)
|
||||
|
||||
var (
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
isHybridOnce sync.Once
|
||||
isHybrid bool
|
||||
)
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(unifiedMountpoint, &st)
|
||||
if err != nil {
|
||||
level := logrus.WarnLevel
|
||||
if os.IsNotExist(err) && userns.RunningInUserNS() {
|
||||
// For rootless containers, sweep it under the rug.
|
||||
level = logrus.DebugLevel
|
||||
}
|
||||
logrus.StandardLogger().Logf(level,
|
||||
"statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err)
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
|
||||
func IsCgroup2HybridMode() bool {
|
||||
isHybridOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(hybridMountpoint, &st)
|
||||
if err != nil {
|
||||
isHybrid = false
|
||||
if !os.IsNotExist(err) {
|
||||
// Report unexpected errors.
|
||||
logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
|
||||
}
|
||||
return
|
||||
}
|
||||
isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isHybrid
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Mountpoint string
|
||||
Root string
|
||||
Subsystems []string
|
||||
}
|
||||
|
||||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
// This function should not be used from cgroupv2 code, as in this case
|
||||
// all the controllers are available under the constant unifiedMountpoint.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// TODO: remove cgroupv2 case once all external users are converted
|
||||
availableControllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Mount{
|
||||
Mountpoint: unifiedMountpoint,
|
||||
Root: unifiedMountpoint,
|
||||
Subsystems: availableControllers,
|
||||
}
|
||||
return []Mount{m}, nil
|
||||
}
|
||||
|
||||
return getCgroupMountsV1(all)
|
||||
}
|
||||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
// /proc/cgroups is meaningless for v2
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
// - devices: implemented in kernel 4.15
|
||||
// - freezer: implemented in kernel 5.2
|
||||
// We assume these are always available, as it is hard to detect availability.
|
||||
pseudo := []string{"devices", "freezer"}
|
||||
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subsystems := append(pseudo, strings.Fields(data)...)
|
||||
return subsystems, nil
|
||||
}
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
subsystems := []string{}
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
if text[0] != '#' {
|
||||
parts := strings.Fields(text)
|
||||
if len(parts) >= 4 && parts[3] != "0" {
|
||||
subsystems = append(subsystems, parts[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) (out []int, _ error) {
|
||||
file := CgroupProcesses
|
||||
retry := true
|
||||
|
||||
again:
|
||||
f, err := OpenFile(dir, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
if errors.Is(s.Err(), unix.ENOTSUP) && retry {
|
||||
// For a threaded cgroup, read returns ENOTSUP, and we should
|
||||
// read from cgroup.threads instead.
|
||||
file = "cgroup.threads"
|
||||
retry = false
|
||||
goto again
|
||||
}
|
||||
return out, s.Err()
|
||||
}
|
||||
|
||||
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
|
||||
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
|
||||
//
|
||||
// "cpu": "/user.slice/user-1000.slice"
|
||||
// "pids": "/user.slice/user-1000.slice"
|
||||
//
|
||||
// etc.
|
||||
//
|
||||
// Note that for cgroup v2 unified hierarchy, there are no per-controller
|
||||
// cgroup paths, so the resulting map will have a single element where the key
|
||||
// is empty string ("") and the value is the cgroup path the <pid> is in.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
// helper function for ParseCgroupFile to make testing easier
|
||||
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
cgroups := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
// from cgroups(7):
|
||||
// /proc/[pid]/cgroup
|
||||
// ...
|
||||
// For each cgroup hierarchy ... there is one entry
|
||||
// containing three colon-separated fields of the form:
|
||||
// hierarchy-ID:subsystem-list:cgroup-path
|
||||
parts := strings.SplitN(text, ":", 3)
|
||||
if len(parts) < 3 {
|
||||
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
||||
}
|
||||
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
cgroups[subs] = parts[2]
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
func PathExists(path string) bool {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// rmdir tries to remove a directory, optionally retrying on EBUSY.
|
||||
func rmdir(path string, retry bool) error {
|
||||
delay := time.Millisecond
|
||||
tries := 10
|
||||
|
||||
again:
|
||||
err := unix.Rmdir(path)
|
||||
switch err { // nolint:errorlint // unix errors are bare
|
||||
case nil, unix.ENOENT:
|
||||
return nil
|
||||
case unix.EINTR:
|
||||
goto again
|
||||
case unix.EBUSY:
|
||||
if retry && tries > 0 {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
tries--
|
||||
goto again
|
||||
|
||||
}
|
||||
}
|
||||
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
||||
}
|
||||
|
||||
// RemovePath aims to remove cgroup path. It does so recursively,
|
||||
// by removing any subdirectories (sub-cgroups) first.
|
||||
func RemovePath(path string) error {
|
||||
// Try the fast path first.
|
||||
if err := rmdir(path, false); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
infos, err := os.ReadDir(path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
for _, info := range infos {
|
||||
if info.IsDir() {
|
||||
// We should remove subcgroup first.
|
||||
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
err = rmdir(path, true)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RemovePaths iterates over the provided paths removing them.
|
||||
func RemovePaths(paths map[string]string) (err error) {
|
||||
for s, p := range paths {
|
||||
if err := RemovePath(p); err == nil {
|
||||
delete(paths, s)
|
||||
}
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
clear(paths)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||
}
|
||||
|
||||
var (
|
||||
hugePageSizes []string
|
||||
initHPSOnce sync.Once
|
||||
)
|
||||
|
||||
func HugePageSizes() []string {
|
||||
initHPSOnce.Do(func() {
|
||||
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
files, err := dir.Readdirnames(0)
|
||||
dir.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
hugePageSizes, err = getHugePageSizeFromFilenames(files)
|
||||
if err != nil {
|
||||
logrus.Warn("HugePageSizes: ", err)
|
||||
}
|
||||
})
|
||||
|
||||
return hugePageSizes
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
pageSizes := make([]string, 0, len(fileNames))
|
||||
var warn error
|
||||
|
||||
for _, file := range fileNames {
|
||||
// example: hugepages-1048576kB
|
||||
val := strings.TrimPrefix(file, "hugepages-")
|
||||
if len(val) == len(file) {
|
||||
// Unexpected file name: no prefix found, ignore it.
|
||||
continue
|
||||
}
|
||||
// The suffix is always "kB" (as of Linux 5.13). If we find
|
||||
// something else, produce an error but keep going.
|
||||
eLen := len(val) - 2
|
||||
val = strings.TrimSuffix(val, "kB")
|
||||
if len(val) != eLen {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = errors.New(file + `: invalid suffix (expected "kB")`)
|
||||
}
|
||||
continue
|
||||
}
|
||||
size, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = fmt.Errorf("%s: %w", file, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
|
||||
// but in our case the size is in KB already.
|
||||
if size >= (1 << 20) {
|
||||
val = strconv.Itoa(size>>20) + "GB"
|
||||
} else if size >= (1 << 10) {
|
||||
val = strconv.Itoa(size>>10) + "MB"
|
||||
} else {
|
||||
val += "KB"
|
||||
}
|
||||
pageSizes = append(pageSizes, val)
|
||||
}
|
||||
|
||||
return pageSizes, warn
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(dir string) ([]int, error) {
|
||||
return readProcsFile(dir)
|
||||
}
|
||||
|
||||
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||
func WriteCgroupProc(dir string, pid int) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||
}
|
||||
|
||||
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
_, err = file.WriteString(strconv.Itoa(pid))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||
// TASK_NEW. We should attempt to do so again.
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
|
||||
// convert from [2-262144] to [1-10000]
|
||||
// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
|
||||
func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
||||
if cpuShares == 0 {
|
||||
return 0
|
||||
}
|
||||
return (1 + ((cpuShares-2)*9999)/262142)
|
||||
}
|
||||
|
||||
// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
|
||||
// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
|
||||
// is defined as memory+swap combined, while in cgroup v2 swap is a separate value,
|
||||
// so we need to subtract memory from it where it makes sense.
|
||||
func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
||||
switch {
|
||||
case memory == -1 && memorySwap == 0:
|
||||
// For compatibility with cgroup1 controller, set swap to unlimited in
|
||||
// case the memory is set to unlimited and the swap is not explicitly set,
|
||||
// treating the request as "set both memory and swap to unlimited".
|
||||
return -1, nil
|
||||
case memorySwap == -1, memorySwap == 0:
|
||||
// Treat -1 ("max") and 0 ("unset") swap as is.
|
||||
return memorySwap, nil
|
||||
case memory == -1:
|
||||
// Unlimited memory, so treat swap as is.
|
||||
return memorySwap, nil
|
||||
case memory == 0:
|
||||
// Unset or unknown memory, can't calculate swap.
|
||||
return 0, errors.New("unable to set swap limit without memory limit")
|
||||
case memory < 0:
|
||||
// Does not make sense to subtract a negative value.
|
||||
return 0, fmt.Errorf("invalid memory value: %d", memory)
|
||||
case memorySwap < memory:
|
||||
// Sanity check.
|
||||
return 0, errors.New("memory+swap limit should be >= memory limit")
|
||||
}
|
||||
|
||||
return memorySwap - memory, nil
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||
// convert linearly from [10-1000] to [1-10000]
|
||||
func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return 1 + (uint64(blkIoWeight)-10)*9999/990
|
||||
}
|
277
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
277
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
@ -0,0 +1,277 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Code in this source file are specific to cgroup v1,
|
||||
// and must not be used from any cgroup v2 code.
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
defaultPrefix = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
||||
ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
|
||||
|
||||
readMountinfoOnce sync.Once
|
||||
readMountinfoErr error
|
||||
cgroupMountinfo []*mountinfo.Info
|
||||
)
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
var nfErr *NotFoundError
|
||||
return errors.As(err, &nfErr)
|
||||
}
|
||||
|
||||
func tryDefaultPath(cgroupPath, subsystem string) string {
|
||||
if !strings.HasPrefix(defaultPrefix, cgroupPath) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// remove possible prefix
|
||||
subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix)
|
||||
|
||||
// Make sure we're still under defaultPrefix, and resolve
|
||||
// a possible symlink (like cpu -> cpu,cpuacct).
|
||||
path, err := securejoin.SecureJoin(defaultPrefix, subsystem)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (1) path should be a directory.
|
||||
st, err := os.Lstat(path)
|
||||
if err != nil || !st.IsDir() {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) path should be a mount point.
|
||||
pst, err := os.Lstat(filepath.Dir(path))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
|
||||
// parent dir has the same dev -- path is not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) path should have 'cgroup' fs type.
|
||||
fst := unix.Statfs_t{}
|
||||
err = unix.Statfs(path, &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones
|
||||
// with fstype of "cgroup") for the current running process.
|
||||
//
|
||||
// The results are cached (to avoid re-reading mountinfo which is relatively
|
||||
// expensive), so it is assumed that cgroup mounts are not being changed.
|
||||
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
||||
readMountinfoOnce.Do(func() {
|
||||
// mountinfo.GetMounts uses /proc/thread-self, so we can use it without
|
||||
// issues.
|
||||
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
|
||||
mountinfo.FSTypeFilter("cgroup"),
|
||||
)
|
||||
})
|
||||
return cgroupMountinfo, readMountinfoErr
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
// Avoid parsing mountinfo by trying the default path first, if possible.
|
||||
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", "", errUnified
|
||||
}
|
||||
|
||||
mi, err := readCgroupMountinfo()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
|
||||
for _, mi := range mounts {
|
||||
if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
|
||||
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||
if opt == subsystem {
|
||||
return mi.Mountpoint, mi.Root, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", errors.New("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
numFound := 0
|
||||
for _, mi := range mounts {
|
||||
m := Mount{
|
||||
Mountpoint: mi.Mountpoint,
|
||||
Root: mi.Root,
|
||||
}
|
||||
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
if !all && numFound >= len(ss) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func getCgroupMountsV1(all bool) ([]Mount, error) {
|
||||
mi, err := readCgroupMountinfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We don't need to use /proc/thread-self here because runc always runs
|
||||
// with every thread in the same cgroup. This lets us avoid having to do
|
||||
// runtime.LockOSThread.
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = false
|
||||
}
|
||||
|
||||
return getCgroupMountsHelper(allMap, mi, all)
|
||||
}
|
||||
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
// We don't need to use /proc/thread-self here because runc always runs
|
||||
// with every thread in the same cgroup. This lets us avoid having to do
|
||||
// runtime.LockOSThread.
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
}
|
Reference in New Issue
Block a user