mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-03 04:16:42 +00:00
363 lines
10 KiB
Go
363 lines
10 KiB
Go
package systemd
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
|
dbus "github.com/godbus/dbus/v5"
|
|
"github.com/sirupsen/logrus"
|
|
|
|
"github.com/opencontainers/cgroups"
|
|
)
|
|
|
|
const (
|
|
// Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2.
|
|
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
|
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
|
defCPUQuotaPeriod = uint64(100000)
|
|
)
|
|
|
|
var (
|
|
versionOnce sync.Once
|
|
version int
|
|
|
|
isRunningSystemdOnce sync.Once
|
|
isRunningSystemd bool
|
|
|
|
// GenerateDeviceProps is a function to generate systemd device
|
|
// properties, used by Set methods. Unless
|
|
// [github.com/opencontainers/cgroups/devices]
|
|
// package is imported, it is set to nil, so cgroup managers can't
|
|
// configure devices.
|
|
GenerateDeviceProps func(r *cgroups.Resources, sdVer int) ([]systemdDbus.Property, error)
|
|
)
|
|
|
|
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
|
// It was borrowed here to avoid a dependency on cgo.
|
|
//
|
|
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
|
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
|
// checks whether /run/systemd/system/ exists and is a directory.
|
|
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
|
func IsRunningSystemd() bool {
|
|
isRunningSystemdOnce.Do(func() {
|
|
fi, err := os.Lstat("/run/systemd/system")
|
|
isRunningSystemd = err == nil && fi.IsDir()
|
|
})
|
|
return isRunningSystemd
|
|
}
|
|
|
|
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
|
// generating the path of slice. Essentially, test-a-b.slice becomes
|
|
// /test.slice/test-a.slice/test-a-b.slice.
|
|
func ExpandSlice(slice string) (string, error) {
|
|
suffix := ".slice"
|
|
// Name has to end with ".slice", but can't be just ".slice".
|
|
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
|
return "", fmt.Errorf("invalid slice name: %s", slice)
|
|
}
|
|
|
|
// Path-separators are not allowed.
|
|
if strings.Contains(slice, "/") {
|
|
return "", fmt.Errorf("invalid slice name: %s", slice)
|
|
}
|
|
|
|
var path, prefix string
|
|
sliceName := strings.TrimSuffix(slice, suffix)
|
|
// if input was -.slice, we should just return root now
|
|
if sliceName == "-" {
|
|
return "/", nil
|
|
}
|
|
for _, component := range strings.Split(sliceName, "-") {
|
|
// test--a.slice isn't permitted, nor is -test.slice.
|
|
if component == "" {
|
|
return "", fmt.Errorf("invalid slice name: %s", slice)
|
|
}
|
|
|
|
// Append the component to the path and to the prefix.
|
|
path += "/" + prefix + component + suffix
|
|
prefix += component + "-"
|
|
}
|
|
return path, nil
|
|
}
|
|
|
|
func newProp(name string, units interface{}) systemdDbus.Property {
|
|
return systemdDbus.Property{
|
|
Name: name,
|
|
Value: dbus.MakeVariant(units),
|
|
}
|
|
}
|
|
|
|
func getUnitName(c *cgroups.Cgroup) string {
|
|
// by default, we create a scope unless the user explicitly asks for a slice.
|
|
if !strings.HasSuffix(c.Name, ".slice") {
|
|
return c.ScopePrefix + "-" + c.Name + ".scope"
|
|
}
|
|
return c.Name
|
|
}
|
|
|
|
// This code should be in sync with getUnitName.
|
|
func getUnitType(unitName string) string {
|
|
if strings.HasSuffix(unitName, ".slice") {
|
|
return "Slice"
|
|
}
|
|
return "Scope"
|
|
}
|
|
|
|
// isDbusError returns true if the error is a specific dbus error.
|
|
func isDbusError(err error, name string) bool {
|
|
if err != nil {
|
|
var derr dbus.Error
|
|
if errors.As(err, &derr) {
|
|
return strings.Contains(derr.Name, name)
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isUnitExists returns true if the error is that a systemd unit already exists.
|
|
func isUnitExists(err error) bool {
|
|
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
|
|
}
|
|
|
|
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
|
|
statusChan := make(chan string, 1)
|
|
retry := true
|
|
|
|
retry:
|
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
|
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
|
|
return err
|
|
})
|
|
if err != nil {
|
|
if !isUnitExists(err) {
|
|
return err
|
|
}
|
|
if ignoreExist {
|
|
// TODO: remove this hack.
|
|
// This is kubelet making sure a slice exists (see
|
|
// https://github.com/opencontainers/runc/pull/1124).
|
|
return nil
|
|
}
|
|
if retry {
|
|
// In case a unit with the same name exists, this may
|
|
// be a leftover failed unit. Reset it, so systemd can
|
|
// remove it, and retry once.
|
|
err = resetFailedUnit(cm, unitName)
|
|
if err != nil {
|
|
logrus.Warnf("unable to reset failed unit: %v", err)
|
|
}
|
|
retry = false
|
|
goto retry
|
|
}
|
|
return err
|
|
}
|
|
|
|
timeout := time.NewTimer(30 * time.Second)
|
|
defer timeout.Stop()
|
|
|
|
select {
|
|
case s := <-statusChan:
|
|
close(statusChan)
|
|
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
|
if s != "done" {
|
|
_ = resetFailedUnit(cm, unitName)
|
|
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
|
}
|
|
case <-timeout.C:
|
|
_ = resetFailedUnit(cm, unitName)
|
|
return errors.New("Timeout waiting for systemd to create " + unitName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func stopUnit(cm *dbusConnManager, unitName string) error {
|
|
statusChan := make(chan string, 1)
|
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
|
_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
|
|
return err
|
|
})
|
|
if err == nil {
|
|
timeout := time.NewTimer(30 * time.Second)
|
|
defer timeout.Stop()
|
|
|
|
select {
|
|
case s := <-statusChan:
|
|
close(statusChan)
|
|
// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
|
if s != "done" {
|
|
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
|
}
|
|
case <-timeout.C:
|
|
return errors.New("Timed out while waiting for systemd to remove " + unitName)
|
|
}
|
|
}
|
|
|
|
// In case of a failed unit, let systemd remove it.
|
|
_ = resetFailedUnit(cm, unitName)
|
|
|
|
return nil
|
|
}
|
|
|
|
func resetFailedUnit(cm *dbusConnManager, name string) error {
|
|
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
|
return c.ResetFailedUnitContext(context.TODO(), name)
|
|
})
|
|
}
|
|
|
|
func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
|
|
var prop *systemdDbus.Property
|
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
|
|
prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
|
|
return Err
|
|
})
|
|
return prop, err
|
|
}
|
|
|
|
func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
|
|
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
|
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
|
|
})
|
|
}
|
|
|
|
func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
|
|
str := ""
|
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
|
var err error
|
|
str, err = c.GetManagerProperty(name)
|
|
return err
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return strconv.Unquote(str)
|
|
}
|
|
|
|
func systemdVersion(cm *dbusConnManager) int {
|
|
versionOnce.Do(func() {
|
|
version = -1
|
|
verStr, err := getManagerProperty(cm, "Version")
|
|
if err == nil {
|
|
version, err = systemdVersionAtoi(verStr)
|
|
}
|
|
|
|
if err != nil {
|
|
logrus.WithError(err).Error("unable to get systemd version")
|
|
}
|
|
})
|
|
|
|
return version
|
|
}
|
|
|
|
// systemdVersionAtoi extracts a numeric systemd version from the argument.
|
|
// The argument should be of the form: "v245.4-1.fc32", "245", "v245-1.fc32",
|
|
// "245-1.fc32" (with or without quotes). The result for all of the above
|
|
// should be 245.
|
|
func systemdVersionAtoi(str string) (int, error) {
|
|
// Unconditionally remove the leading prefix ("v).
|
|
str = strings.TrimLeft(str, `"v`)
|
|
// Match on the first integer we can grab.
|
|
for i := 0; i < len(str); i++ {
|
|
if str[i] < '0' || str[i] > '9' {
|
|
// First non-digit: cut the tail.
|
|
str = str[:i]
|
|
break
|
|
}
|
|
}
|
|
ver, err := strconv.Atoi(str)
|
|
if err != nil {
|
|
return -1, fmt.Errorf("can't parse version: %w", err)
|
|
}
|
|
return ver, nil
|
|
}
|
|
|
|
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
|
if period != 0 {
|
|
// systemd only supports CPUQuotaPeriodUSec since v242
|
|
sdVer := systemdVersion(cm)
|
|
if sdVer >= 242 {
|
|
*properties = append(*properties,
|
|
newProp("CPUQuotaPeriodUSec", period))
|
|
} else {
|
|
logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
|
|
" (setting will still be applied to cgroupfs)", sdVer)
|
|
}
|
|
}
|
|
if quota != 0 || period != 0 {
|
|
// corresponds to USEC_INFINITY in systemd
|
|
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
|
if quota > 0 {
|
|
if period == 0 {
|
|
// assume the default
|
|
period = defCPUQuotaPeriod
|
|
}
|
|
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
|
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
|
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
|
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
|
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
|
if cpuQuotaPerSecUSec%10000 != 0 {
|
|
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
|
}
|
|
}
|
|
*properties = append(*properties,
|
|
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
|
}
|
|
}
|
|
|
|
func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
|
|
if cpus == "" && mems == "" {
|
|
return nil
|
|
}
|
|
|
|
// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
|
|
sdVer := systemdVersion(cm)
|
|
if sdVer < 244 {
|
|
logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
|
|
" (settings will still be applied to cgroupfs)", sdVer)
|
|
return nil
|
|
}
|
|
|
|
if cpus != "" {
|
|
bits, err := RangeToBits(cpus)
|
|
if err != nil {
|
|
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
|
|
cpus, err)
|
|
}
|
|
*props = append(*props,
|
|
newProp("AllowedCPUs", bits))
|
|
}
|
|
if mems != "" {
|
|
bits, err := RangeToBits(mems)
|
|
if err != nil {
|
|
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
|
|
mems, err)
|
|
}
|
|
*props = append(*props,
|
|
newProp("AllowedMemoryNodes", bits))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// generateDeviceProperties takes the configured device rules and generates a
|
|
// corresponding set of systemd properties to configure the devices correctly.
|
|
func generateDeviceProperties(r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
|
if GenerateDeviceProps == nil {
|
|
if len(r.Devices) > 0 {
|
|
return nil, cgroups.ErrDevicesUnsupported
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
return GenerateDeviceProps(r, systemdVersion(cm))
|
|
}
|