build: move e2e dependencies into e2e/go.mod

Several packages are only used while running the e2e suite. These
packages are less important to update, as the they can not influence the
final executable that is part of the Ceph-CSI container-image.

By moving these dependencies out of the main Ceph-CSI go.mod, it is
easier to identify if a reported CVE affects Ceph-CSI, or only the
testing (like most of the Kubernetes CVEs).

Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
Niels de Vos
2025-03-04 08:57:28 +01:00
committed by mergify[bot]
parent 15da101b1b
commit bec6090996
8047 changed files with 1407827 additions and 3453 deletions

View File

@ -0,0 +1,506 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"bytes"
"fmt"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"time"
"k8s.io/klog/v2"
"k8s.io/apimachinery/pkg/util/sets"
)
const (
maxFileNameLength = 255
maxPathLength = 4096
)
// AtomicWriter handles atomically projecting content for a set of files into
// a target directory.
//
// Note:
//
// 1. AtomicWriter reserves the set of pathnames starting with `..`.
// 2. AtomicWriter offers no concurrency guarantees and must be synchronized
// by the caller.
//
// The visible files in this volume are symlinks to files in the writer's data
// directory. Actual files are stored in a hidden timestamped directory which
// is symlinked to by the data directory. The timestamped directory and
// data directory symlink are created in the writer's target dir.  This scheme
// allows the files to be atomically updated by changing the target of the
// data directory symlink.
//
// Consumers of the target directory can monitor the ..data symlink using
// inotify or fanotify to receive events when the content in the volume is
// updated.
type AtomicWriter struct {
targetDir string
logContext string
}
// FileProjection contains file Data and access Mode
type FileProjection struct {
Data []byte
Mode int32
FsUser *int64
}
// NewAtomicWriter creates a new AtomicWriter configured to write to the given
// target directory, or returns an error if the target directory does not exist.
func NewAtomicWriter(targetDir string, logContext string) (*AtomicWriter, error) {
_, err := os.Stat(targetDir)
if os.IsNotExist(err) {
return nil, err
}
return &AtomicWriter{targetDir: targetDir, logContext: logContext}, nil
}
const (
dataDirName = "..data"
newDataDirName = "..data_tmp"
)
// Write does an atomic projection of the given payload into the writer's target
// directory. Input paths must not begin with '..'.
// setPerms is an optional pointer to a function that caller can provide to set the
// permissions of the newly created files before they are published. The function is
// passed subPath which is the name of the timestamped directory that was created
// under target directory.
//
// The Write algorithm is:
//
// 1. The payload is validated; if the payload is invalid, the function returns
//
// 2. The current timestamped directory is detected by reading the data directory
// symlink
//
// 3. The old version of the volume is walked to determine whether any
// portion of the payload was deleted and is still present on disk.
//
// 4. The data in the current timestamped directory is compared to the projected
// data to determine if an update to data directory is required.
//
// 5. A new timestamped dir is created if an update is required.
//
// 6. The payload is written to the new timestamped directory.
//
// 7. Permissions are set (if setPerms is not nil) on the new timestamped directory and files.
//
// 8. A symlink to the new timestamped directory ..data_tmp is created that will
// become the new data directory.
//
// 9. The new data directory symlink is renamed to the data directory; rename is atomic.
//
// 10. Symlinks and directory for new user-visible files are created (if needed).
//
// For example, consider the files:
// <target-dir>/podName
// <target-dir>/user/labels
// <target-dir>/k8s/annotations
//
// The user visible files are symbolic links into the internal data directory:
// <target-dir>/podName -> ..data/podName
// <target-dir>/usr -> ..data/usr
// <target-dir>/k8s -> ..data/k8s
//
// The data directory itself is a link to a timestamped directory with
// the real data:
// <target-dir>/..data -> ..2016_02_01_15_04_05.12345678/
// NOTE(claudiub): We need to create these symlinks AFTER we've finished creating and
// linking everything else. On Windows, if a target does not exist, the created symlink
// will not work properly if the target ends up being a directory.
//
// 11. Old paths are removed from the user-visible portion of the target directory.
//
// 12. The previous timestamped directory is removed, if it exists.
func (w *AtomicWriter) Write(payload map[string]FileProjection, setPerms func(subPath string) error) error {
// (1)
cleanPayload, err := validatePayload(payload)
if err != nil {
klog.Errorf("%s: invalid payload: %v", w.logContext, err)
return err
}
// (2)
dataDirPath := filepath.Join(w.targetDir, dataDirName)
oldTsDir, err := os.Readlink(dataDirPath)
if err != nil {
if !os.IsNotExist(err) {
klog.Errorf("%s: error reading link for data directory: %v", w.logContext, err)
return err
}
// although Readlink() returns "" on err, don't be fragile by relying on it (since it's not specified in docs)
// empty oldTsDir indicates that it didn't exist
oldTsDir = ""
}
oldTsPath := filepath.Join(w.targetDir, oldTsDir)
var pathsToRemove sets.Set[string]
shouldWrite := true
// if there was no old version, there's nothing to remove
if len(oldTsDir) != 0 {
// (3)
pathsToRemove, err = w.pathsToRemove(cleanPayload, oldTsPath)
if err != nil {
klog.Errorf("%s: error determining user-visible files to remove: %v", w.logContext, err)
return err
}
// (4)
if should, err := shouldWritePayload(cleanPayload, oldTsPath); err != nil {
klog.Errorf("%s: error determining whether payload should be written to disk: %v", w.logContext, err)
return err
} else if !should && len(pathsToRemove) == 0 {
klog.V(4).Infof("%s: write not required for data directory %v", w.logContext, oldTsDir)
// data directory is already up to date, but we need to make sure that
// the user-visible symlinks are created.
// See https://github.com/kubernetes/kubernetes/issues/121472 for more details.
// Reset oldTsDir to empty string to avoid removing the data directory.
shouldWrite = false
oldTsDir = ""
} else {
klog.V(4).Infof("%s: write required for target directory %v", w.logContext, w.targetDir)
}
}
if shouldWrite {
// (5)
tsDir, err := w.newTimestampDir()
if err != nil {
klog.V(4).Infof("%s: error creating new ts data directory: %v", w.logContext, err)
return err
}
tsDirName := filepath.Base(tsDir)
// (6)
if err = w.writePayloadToDir(cleanPayload, tsDir); err != nil {
klog.Errorf("%s: error writing payload to ts data directory %s: %v", w.logContext, tsDir, err)
return err
}
klog.V(4).Infof("%s: performed write of new data to ts data directory: %s", w.logContext, tsDir)
// (7)
if setPerms != nil {
if err := setPerms(tsDirName); err != nil {
klog.Errorf("%s: error applying ownership settings: %v", w.logContext, err)
return err
}
}
// (8)
newDataDirPath := filepath.Join(w.targetDir, newDataDirName)
if err = os.Symlink(tsDirName, newDataDirPath); err != nil {
if err := os.RemoveAll(tsDir); err != nil {
klog.Errorf("%s: error removing new ts directory %s: %v", w.logContext, tsDir, err)
}
klog.Errorf("%s: error creating symbolic link for atomic update: %v", w.logContext, err)
return err
}
// (9)
if runtime.GOOS == "windows" {
if err := os.Remove(dataDirPath); err != nil {
klog.Errorf("%s: error removing data dir directory %s: %v", w.logContext, dataDirPath, err)
}
err = os.Symlink(tsDirName, dataDirPath)
if err := os.Remove(newDataDirPath); err != nil {
klog.Errorf("%s: error removing new data dir directory %s: %v", w.logContext, newDataDirPath, err)
}
} else {
err = os.Rename(newDataDirPath, dataDirPath)
}
if err != nil {
if err := os.Remove(newDataDirPath); err != nil && err != os.ErrNotExist {
klog.Errorf("%s: error removing new data dir directory %s: %v", w.logContext, newDataDirPath, err)
}
if err := os.RemoveAll(tsDir); err != nil {
klog.Errorf("%s: error removing new ts directory %s: %v", w.logContext, tsDir, err)
}
klog.Errorf("%s: error renaming symbolic link for data directory %s: %v", w.logContext, newDataDirPath, err)
return err
}
}
// (10)
if err = w.createUserVisibleFiles(cleanPayload); err != nil {
klog.Errorf("%s: error creating visible symlinks in %s: %v", w.logContext, w.targetDir, err)
return err
}
// (11)
if err = w.removeUserVisiblePaths(pathsToRemove); err != nil {
klog.Errorf("%s: error removing old visible symlinks: %v", w.logContext, err)
return err
}
// (12)
if len(oldTsDir) > 0 {
if err = os.RemoveAll(oldTsPath); err != nil {
klog.Errorf("%s: error removing old data directory %s: %v", w.logContext, oldTsDir, err)
return err
}
}
return nil
}
// validatePayload returns an error if any path in the payload returns a copy of the payload with the paths cleaned.
func validatePayload(payload map[string]FileProjection) (map[string]FileProjection, error) {
cleanPayload := make(map[string]FileProjection)
for k, content := range payload {
if err := validatePath(k); err != nil {
return nil, err
}
cleanPayload[filepath.Clean(k)] = content
}
return cleanPayload, nil
}
// validatePath validates a single path, returning an error if the path is
// invalid. paths may not:
//
// 1. be absolute
// 2. contain '..' as an element
// 3. start with '..'
// 4. contain filenames larger than 255 characters
// 5. be longer than 4096 characters
func validatePath(targetPath string) error {
// TODO: somehow unify this with the similar api validation,
// validateVolumeSourcePath; the error semantics are just different enough
// from this that it was time-prohibitive trying to find the right
// refactoring to re-use.
if targetPath == "" {
return fmt.Errorf("invalid path: must not be empty: %q", targetPath)
}
if path.IsAbs(targetPath) {
return fmt.Errorf("invalid path: must be relative path: %s", targetPath)
}
if len(targetPath) > maxPathLength {
return fmt.Errorf("invalid path: must be less than or equal to %d characters", maxPathLength)
}
items := strings.Split(targetPath, string(os.PathSeparator))
for _, item := range items {
if item == ".." {
return fmt.Errorf("invalid path: must not contain '..': %s", targetPath)
}
if len(item) > maxFileNameLength {
return fmt.Errorf("invalid path: filenames must be less than or equal to %d characters", maxFileNameLength)
}
}
if strings.HasPrefix(items[0], "..") && len(items[0]) > 2 {
return fmt.Errorf("invalid path: must not start with '..': %s", targetPath)
}
return nil
}
// shouldWritePayload returns whether the payload should be written to disk.
func shouldWritePayload(payload map[string]FileProjection, oldTsDir string) (bool, error) {
for userVisiblePath, fileProjection := range payload {
shouldWrite, err := shouldWriteFile(filepath.Join(oldTsDir, userVisiblePath), fileProjection.Data)
if err != nil {
return false, err
}
if shouldWrite {
return true, nil
}
}
return false, nil
}
// shouldWriteFile returns whether a new version of a file should be written to disk.
func shouldWriteFile(path string, content []byte) (bool, error) {
_, err := os.Lstat(path)
if os.IsNotExist(err) {
return true, nil
}
contentOnFs, err := os.ReadFile(path)
if err != nil {
return false, err
}
return !bytes.Equal(content, contentOnFs), nil
}
// pathsToRemove walks the current version of the data directory and
// determines which paths should be removed (if any) after the payload is
// written to the target directory.
func (w *AtomicWriter) pathsToRemove(payload map[string]FileProjection, oldTSDir string) (sets.Set[string], error) {
paths := sets.New[string]()
visitor := func(path string, info os.FileInfo, err error) error {
relativePath := strings.TrimPrefix(path, oldTSDir)
relativePath = strings.TrimPrefix(relativePath, string(os.PathSeparator))
if relativePath == "" {
return nil
}
paths.Insert(relativePath)
return nil
}
err := filepath.Walk(oldTSDir, visitor)
if os.IsNotExist(err) {
return nil, nil
} else if err != nil {
return nil, err
}
klog.V(5).Infof("%s: current paths: %+v", w.targetDir, sets.List(paths))
newPaths := sets.New[string]()
for file := range payload {
// add all subpaths for the payload to the set of new paths
// to avoid attempting to remove non-empty dirs
for subPath := file; subPath != ""; {
newPaths.Insert(subPath)
subPath, _ = filepath.Split(subPath)
subPath = strings.TrimSuffix(subPath, string(os.PathSeparator))
}
}
klog.V(5).Infof("%s: new paths: %+v", w.targetDir, sets.List(newPaths))
result := paths.Difference(newPaths)
klog.V(5).Infof("%s: paths to remove: %+v", w.targetDir, result)
return result, nil
}
// newTimestampDir creates a new timestamp directory
func (w *AtomicWriter) newTimestampDir() (string, error) {
tsDir, err := os.MkdirTemp(w.targetDir, time.Now().UTC().Format("..2006_01_02_15_04_05."))
if err != nil {
klog.Errorf("%s: unable to create new temp directory: %v", w.logContext, err)
return "", err
}
// 0755 permissions are needed to allow 'group' and 'other' to recurse the
// directory tree. do a chmod here to ensure that permissions are set correctly
// regardless of the process' umask.
err = os.Chmod(tsDir, 0755)
if err != nil {
klog.Errorf("%s: unable to set mode on new temp directory: %v", w.logContext, err)
return "", err
}
return tsDir, nil
}
// writePayloadToDir writes the given payload to the given directory. The
// directory must exist.
func (w *AtomicWriter) writePayloadToDir(payload map[string]FileProjection, dir string) error {
for userVisiblePath, fileProjection := range payload {
content := fileProjection.Data
mode := os.FileMode(fileProjection.Mode)
fullPath := filepath.Join(dir, userVisiblePath)
baseDir, _ := filepath.Split(fullPath)
if err := os.MkdirAll(baseDir, os.ModePerm); err != nil {
klog.Errorf("%s: unable to create directory %s: %v", w.logContext, baseDir, err)
return err
}
if err := os.WriteFile(fullPath, content, mode); err != nil {
klog.Errorf("%s: unable to write file %s with mode %v: %v", w.logContext, fullPath, mode, err)
return err
}
// Chmod is needed because os.WriteFile() ends up calling
// open(2) to create the file, so the final mode used is "mode &
// ~umask". But we want to make sure the specified mode is used
// in the file no matter what the umask is.
if err := os.Chmod(fullPath, mode); err != nil {
klog.Errorf("%s: unable to change file %s with mode %v: %v", w.logContext, fullPath, mode, err)
return err
}
if fileProjection.FsUser == nil {
continue
}
if err := w.chown(fullPath, int(*fileProjection.FsUser), -1); err != nil {
klog.Errorf("%s: unable to change file %s with owner %v: %v", w.logContext, fullPath, int(*fileProjection.FsUser), err)
return err
}
}
return nil
}
// createUserVisibleFiles creates the relative symlinks for all the
// files configured in the payload. If the directory in a file path does not
// exist, it is created.
//
// Viz:
// For files: "bar", "foo/bar", "baz/bar", "foo/baz/blah"
// the following symlinks are created:
// bar -> ..data/bar
// foo -> ..data/foo
// baz -> ..data/baz
func (w *AtomicWriter) createUserVisibleFiles(payload map[string]FileProjection) error {
for userVisiblePath := range payload {
slashpos := strings.Index(userVisiblePath, string(os.PathSeparator))
if slashpos == -1 {
slashpos = len(userVisiblePath)
}
linkname := userVisiblePath[:slashpos]
_, err := os.Readlink(filepath.Join(w.targetDir, linkname))
if err != nil && os.IsNotExist(err) {
// The link into the data directory for this path doesn't exist; create it
visibleFile := filepath.Join(w.targetDir, linkname)
dataDirFile := filepath.Join(dataDirName, linkname)
err = os.Symlink(dataDirFile, visibleFile)
if err != nil {
return err
}
}
}
return nil
}
// removeUserVisiblePaths removes the set of paths from the user-visible
// portion of the writer's target directory.
func (w *AtomicWriter) removeUserVisiblePaths(paths sets.Set[string]) error {
ps := string(os.PathSeparator)
var lasterr error
for p := range paths {
// only remove symlinks from the volume root directory (i.e. items that don't contain '/')
if strings.Contains(p, ps) {
continue
}
if err := os.Remove(filepath.Join(w.targetDir, p)); err != nil {
klog.Errorf("%s: error pruning old user-visible path %s: %v", w.logContext, p, err)
lasterr = err
}
}
return lasterr
}

View File

@ -0,0 +1,27 @@
//go:build linux
// +build linux
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "os"
// chown changes the numeric uid and gid of the named file.
func (w *AtomicWriter) chown(name string, uid, gid int) error {
return os.Chown(name, uid, gid)
}

View File

@ -0,0 +1,33 @@
//go:build !linux
// +build !linux
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"runtime"
"k8s.io/klog/v2"
)
// chown changes the numeric uid and gid of the named file.
// This is a no-op on unsupported platforms.
func (w *AtomicWriter) chown(name string, uid, _ /* gid */ int) error {
klog.Warningf("%s: skipping change of Linux owner %v for file %s; unsupported on %s", w.logContext, uid, name, runtime.GOOS)
return nil
}

View File

@ -0,0 +1,70 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"crypto/sha1"
"encoding/hex"
)
// This file is a common place holder for volume limit utility constants
// shared between volume package and scheduler
const (
// EBSVolumeLimitKey resource name that will store volume limits for EBS
EBSVolumeLimitKey = "attachable-volumes-aws-ebs"
// EBSNitroLimitRegex finds nitro instance types with different limit than EBS defaults
EBSNitroLimitRegex = "^[cmr]5.*|t3|z1d"
// DefaultMaxEBSVolumes is the limit for volumes attached to an instance.
// Amazon recommends no more than 40; the system root volume uses at least one.
// See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits
DefaultMaxEBSVolumes = 39
// DefaultMaxEBSNitroVolumeLimit is default EBS volume limit on m5 and c5 instances
DefaultMaxEBSNitroVolumeLimit = 25
// AzureVolumeLimitKey stores resource name that will store volume limits for Azure
AzureVolumeLimitKey = "attachable-volumes-azure-disk"
// GCEVolumeLimitKey stores resource name that will store volume limits for GCE node
GCEVolumeLimitKey = "attachable-volumes-gce-pd"
// CinderVolumeLimitKey contains Volume limit key for Cinder
CinderVolumeLimitKey = "attachable-volumes-cinder"
// DefaultMaxCinderVolumes defines the maximum number of PD Volumes for Cinder
// For Openstack we are keeping this to a high enough value so as depending on backend
// cluster admins can configure it.
DefaultMaxCinderVolumes = 256
// CSIAttachLimitPrefix defines prefix used for CSI volumes
CSIAttachLimitPrefix = "attachable-volumes-csi-"
// ResourceNameLengthLimit stores maximum allowed Length for a ResourceName
ResourceNameLengthLimit = 63
)
// GetCSIAttachLimitKey returns limit key used for CSI volumes
func GetCSIAttachLimitKey(driverName string) string {
csiPrefixLength := len(CSIAttachLimitPrefix)
totalkeyLength := csiPrefixLength + len(driverName)
if totalkeyLength >= ResourceNameLengthLimit {
charsFromDriverName := driverName[:23]
hash := sha1.New()
hash.Write([]byte(driverName))
hashed := hex.EncodeToString(hash.Sum(nil))
hashed = hashed[:16]
return CSIAttachLimitPrefix + charsFromDriverName + hashed
}
return CSIAttachLimitPrefix + driverName
}

View File

@ -0,0 +1,34 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
// DeviceUtil is a util for common device methods
type DeviceUtil interface {
FindMultipathDeviceForDevice(disk string) string
FindSlaveDevicesOnMultipath(disk string) []string
GetISCSIPortalHostMapForTarget(targetIqn string) (map[string]int, error)
FindDevicesForISCSILun(targetIqn string, lun int) ([]string, error)
}
type deviceHandler struct {
getIo IoUtil
}
// NewDeviceHandler Create a new IoHandler implementation
func NewDeviceHandler(io IoUtil) DeviceUtil {
return &deviceHandler{getIo: io}
}

View File

@ -0,0 +1,306 @@
//go:build linux
// +build linux
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"errors"
"fmt"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"k8s.io/klog/v2"
)
// FindMultipathDeviceForDevice given a device name like /dev/sdx, find the devicemapper parent. If called with a device
// already resolved to devicemapper, do nothing.
func (handler *deviceHandler) FindMultipathDeviceForDevice(device string) string {
if strings.HasPrefix(device, "/dev/dm-") {
return device
}
io := handler.getIo
disk, err := findDeviceForPath(device, io)
if err != nil {
return ""
}
sysPath := "/sys/block/"
if dirs, err := io.ReadDir(sysPath); err == nil {
for _, f := range dirs {
name := f.Name()
if strings.HasPrefix(name, "dm-") {
if _, err1 := io.Lstat(sysPath + name + "/slaves/" + disk); err1 == nil {
return "/dev/" + name
}
}
}
}
return ""
}
// findDeviceForPath Find the underlying disk for a linked path such as /dev/disk/by-path/XXXX or /dev/mapper/XXXX
// will return sdX or hdX etc, if /dev/sdX is passed in then sdX will be returned
func findDeviceForPath(path string, io IoUtil) (string, error) {
devicePath, err := io.EvalSymlinks(path)
if err != nil {
return "", err
}
// if path /dev/hdX split into "", "dev", "hdX" then we will
// return just the last part
parts := strings.Split(devicePath, "/")
if len(parts) == 3 && strings.HasPrefix(parts[1], "dev") {
return parts[2], nil
}
return "", errors.New("Illegal path for device " + devicePath)
}
// FindSlaveDevicesOnMultipath given a dm name like /dev/dm-1, find all devices
// which are managed by the devicemapper dm-1.
func (handler *deviceHandler) FindSlaveDevicesOnMultipath(dm string) []string {
var devices []string
io := handler.getIo
// Split path /dev/dm-1 into "", "dev", "dm-1"
parts := strings.Split(dm, "/")
if len(parts) != 3 || !strings.HasPrefix(parts[1], "dev") {
return devices
}
disk := parts[2]
slavesPath := filepath.Join("/sys/block/", disk, "/slaves/")
if files, err := io.ReadDir(slavesPath); err == nil {
for _, f := range files {
devices = append(devices, filepath.Join("/dev/", f.Name()))
}
}
return devices
}
// GetISCSIPortalHostMapForTarget given a target iqn, find all the scsi hosts logged into
// that target. Returns a map of iSCSI portals (string) to SCSI host numbers (integers).
//
// For example: {
// "192.168.30.7:3260": 2,
// "192.168.30.8:3260": 3,
// }
func (handler *deviceHandler) GetISCSIPortalHostMapForTarget(targetIqn string) (map[string]int, error) {
portalHostMap := make(map[string]int)
io := handler.getIo
// Iterate over all the iSCSI hosts in sysfs
sysPath := "/sys/class/iscsi_host"
hostDirs, err := io.ReadDir(sysPath)
if err != nil {
if os.IsNotExist(err) {
return portalHostMap, nil
}
return nil, err
}
for _, hostDir := range hostDirs {
// iSCSI hosts are always of the format "host%d"
// See drivers/scsi/hosts.c in Linux
hostName := hostDir.Name()
if !strings.HasPrefix(hostName, "host") {
continue
}
hostNumber, err := strconv.Atoi(strings.TrimPrefix(hostName, "host"))
if err != nil {
klog.Errorf("Could not get number from iSCSI host: %s", hostName)
continue
}
// Iterate over the children of the iscsi_host device
// We are looking for the associated session
devicePath := sysPath + "/" + hostName + "/device"
deviceDirs, err := io.ReadDir(devicePath)
if err != nil {
return nil, err
}
for _, deviceDir := range deviceDirs {
// Skip over files that aren't the session
// Sessions are of the format "session%u"
// See drivers/scsi/scsi_transport_iscsi.c in Linux
sessionName := deviceDir.Name()
if !strings.HasPrefix(sessionName, "session") {
continue
}
sessionPath := devicePath + "/" + sessionName
// Read the target name for the iSCSI session
targetNamePath := sessionPath + "/iscsi_session/" + sessionName + "/targetname"
targetName, err := io.ReadFile(targetNamePath)
if err != nil {
klog.Infof("Failed to process session %s, assuming this session is unavailable: %s", sessionName, err)
continue
}
// Ignore hosts that don't matchthe target we were looking for.
if strings.TrimSpace(string(targetName)) != targetIqn {
continue
}
// Iterate over the children of the iSCSI session looking
// for the iSCSI connection.
dirs2, err := io.ReadDir(sessionPath)
if err != nil {
klog.Infof("Failed to process session %s, assuming this session is unavailable: %s", sessionName, err)
continue
}
for _, dir2 := range dirs2 {
// Skip over files that aren't the connection
// Connections are of the format "connection%d:%u"
// See drivers/scsi/scsi_transport_iscsi.c in Linux
dirName := dir2.Name()
if !strings.HasPrefix(dirName, "connection") {
continue
}
connectionPath := sessionPath + "/" + dirName + "/iscsi_connection/" + dirName
// Read the current and persistent portal information for the connection.
addrPath := connectionPath + "/address"
addr, err := io.ReadFile(addrPath)
if err != nil {
klog.Infof("Failed to process connection %s, assuming this connection is unavailable: %s", dirName, err)
continue
}
portPath := connectionPath + "/port"
port, err := io.ReadFile(portPath)
if err != nil {
klog.Infof("Failed to process connection %s, assuming this connection is unavailable: %s", dirName, err)
continue
}
persistentAddrPath := connectionPath + "/persistent_address"
persistentAddr, err := io.ReadFile(persistentAddrPath)
if err != nil {
klog.Infof("Failed to process connection %s, assuming this connection is unavailable: %s", dirName, err)
continue
}
persistentPortPath := connectionPath + "/persistent_port"
persistentPort, err := io.ReadFile(persistentPortPath)
if err != nil {
klog.Infof("Failed to process connection %s, assuming this connection is unavailable: %s", dirName, err)
continue
}
// Add entries to the map for both the current and persistent portals
// pointing to the SCSI host for those connections
// JoinHostPort will add `[]` around IPv6 addresses.
portal := net.JoinHostPort(strings.TrimSpace(string(addr)), strings.TrimSpace(string(port)))
portalHostMap[portal] = hostNumber
persistentPortal := net.JoinHostPort(strings.TrimSpace(string(persistentAddr)), strings.TrimSpace(string(persistentPort)))
portalHostMap[persistentPortal] = hostNumber
}
}
}
return portalHostMap, nil
}
// FindDevicesForISCSILun given an iqn, and lun number, find all the devices
// corresponding to that LUN.
func (handler *deviceHandler) FindDevicesForISCSILun(targetIqn string, lun int) ([]string, error) {
devices := make([]string, 0)
io := handler.getIo
// Iterate over all the iSCSI hosts in sysfs
sysPath := "/sys/class/iscsi_host"
hostDirs, err := io.ReadDir(sysPath)
if err != nil {
return nil, err
}
for _, hostDir := range hostDirs {
// iSCSI hosts are always of the format "host%d"
// See drivers/scsi/hosts.c in Linux
hostName := hostDir.Name()
if !strings.HasPrefix(hostName, "host") {
continue
}
hostNumber, err := strconv.Atoi(strings.TrimPrefix(hostName, "host"))
if err != nil {
klog.Errorf("Could not get number from iSCSI host: %s", hostName)
continue
}
// Iterate over the children of the iscsi_host device
// We are looking for the associated session
devicePath := sysPath + "/" + hostName + "/device"
deviceDirs, err := io.ReadDir(devicePath)
if err != nil {
return nil, err
}
for _, deviceDir := range deviceDirs {
// Skip over files that aren't the session
// Sessions are of the format "session%u"
// See drivers/scsi/scsi_transport_iscsi.c in Linux
sessionName := deviceDir.Name()
if !strings.HasPrefix(sessionName, "session") {
continue
}
// Read the target name for the iSCSI session
targetNamePath := devicePath + "/" + sessionName + "/iscsi_session/" + sessionName + "/targetname"
targetName, err := io.ReadFile(targetNamePath)
if err != nil {
return nil, err
}
// Only if the session matches the target we were looking for,
// add it to the map
if strings.TrimSpace(string(targetName)) != targetIqn {
continue
}
// The list of block devices on the scsi bus will be in a
// directory called "target%d:%d:%d".
// See drivers/scsi/scsi_scan.c in Linux
// We assume the channel/bus and device/controller are always zero for iSCSI
targetPath := devicePath + "/" + sessionName + fmt.Sprintf("/target%d:0:0", hostNumber)
// The block device for a given lun will be "%d:%d:%d:%d" --
// host:channel:bus:LUN
blockDevicePath := targetPath + fmt.Sprintf("/%d:0:0:%d", hostNumber, lun)
// If the LUN doesn't exist on this bus, continue on
_, err = io.Lstat(blockDevicePath)
if err != nil {
continue
}
// Read the block directory, there should only be one child --
// the block device "sd*"
path := blockDevicePath + "/block"
dirs, err := io.ReadDir(path)
if err != nil {
return nil, err
}
if 0 < len(dirs) {
devices = append(devices, dirs[0].Name())
}
}
}
return devices, nil
}

View File

@ -0,0 +1,43 @@
//go:build !linux
// +build !linux
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
// FindMultipathDeviceForDevice unsupported returns ""
func (handler *deviceHandler) FindMultipathDeviceForDevice(device string) string {
return ""
}
// FindSlaveDevicesOnMultipath unsupported returns ""
func (handler *deviceHandler) FindSlaveDevicesOnMultipath(disk string) []string {
out := []string{}
return out
}
// GetISCSIPortalHostMapForTarget unsupported returns nil
func (handler *deviceHandler) GetISCSIPortalHostMapForTarget(targetIqn string) (map[string]int, error) {
portalHostMap := make(map[string]int)
return portalHostMap, nil
}
// FindDevicesForISCSILun unsupported returns nil
func (handler *deviceHandler) FindDevicesForISCSILun(targetIqn string, lun int) ([]string, error) {
devices := []string{}
return devices, nil
}

18
e2e/vendor/k8s.io/kubernetes/pkg/volume/util/doc.go generated vendored Normal file
View File

@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package util contains utility code for use by volume plugins.
package util // import "k8s.io/kubernetes/pkg/volume/util"

View File

@ -0,0 +1,28 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
const (
// PVCProtectionFinalizer is the name of finalizer on PVCs that have a running pod.
PVCProtectionFinalizer = "kubernetes.io/pvc-protection"
// PVProtectionFinalizer is the name of finalizer on PVs that are bound by PVCs
PVProtectionFinalizer = "kubernetes.io/pv-protection"
// VACProtectionFinalizer is the name of finalizer on VACs that are used by PVs or PVCs
VACProtectionFinalizer = "kubernetes.io/vac-protection"
)

148
e2e/vendor/k8s.io/kubernetes/pkg/volume/util/fs/fs.go generated vendored Normal file
View File

@ -0,0 +1,148 @@
//go:build linux || darwin
// +build linux darwin
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fs
import (
"fmt"
"os"
"path/filepath"
"syscall"
"time"
"golang.org/x/sys/unix"
servermetrics "k8s.io/kubernetes/pkg/kubelet/server/metrics"
"k8s.io/kubernetes/pkg/volume/util/fsquota"
)
type UsageInfo struct {
Bytes int64
Inodes int64
}
// Info linux returns (available bytes, byte capacity, byte usage, total inodes, inodes free, inode usage, error)
// for the filesystem that path resides upon.
func Info(path string) (int64, int64, int64, int64, int64, int64, error) {
statfs := &unix.Statfs_t{}
err := unix.Statfs(path, statfs)
if err != nil {
return 0, 0, 0, 0, 0, 0, err
}
// Available is blocks available * fragment size
available := int64(statfs.Bavail) * int64(statfs.Bsize)
// Capacity is total block count * fragment size
capacity := int64(statfs.Blocks) * int64(statfs.Bsize)
// Usage is block being used * fragment size (aka block size).
usage := (int64(statfs.Blocks) - int64(statfs.Bfree)) * int64(statfs.Bsize)
inodes := int64(statfs.Files)
inodesFree := int64(statfs.Ffree)
inodesUsed := inodes - inodesFree
return available, capacity, usage, inodes, inodesFree, inodesUsed, nil
}
// DiskUsage calculates the number of inodes and disk usage for a given directory
func DiskUsage(path string) (UsageInfo, error) {
var usage UsageInfo
if path == "" {
return usage, fmt.Errorf("invalid directory")
}
// First check whether the quota system knows about this directory
// A nil quantity or error means that the path does not support quotas
// or xfs_quota tool is missing and we should use other mechanisms.
startTime := time.Now()
consumption, _ := fsquota.GetConsumption(path)
if consumption != nil {
usage.Bytes = consumption.Value()
defer servermetrics.CollectVolumeStatCalDuration("fsquota", startTime)
} else {
defer servermetrics.CollectVolumeStatCalDuration("du", startTime)
}
inodes, _ := fsquota.GetInodes(path)
if inodes != nil {
usage.Inodes = inodes.Value()
}
if inodes != nil && consumption != nil {
return usage, nil
}
topLevelStat := &unix.Stat_t{}
err := unix.Stat(path, topLevelStat)
if err != nil {
return usage, err
}
// dedupedInode stores inodes that could be duplicates (nlink > 1)
dedupedInodes := make(map[uint64]struct{})
err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
// ignore files that have been deleted after directory was read
if os.IsNotExist(err) {
return nil
}
if err != nil {
return fmt.Errorf("unable to count inodes for %s: %s", path, err)
}
// according to the docs, Sys can be nil
if info.Sys() == nil {
return fmt.Errorf("fileinfo Sys is nil")
}
s, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("unsupported fileinfo; could not convert to stat_t")
}
if s.Dev != topLevelStat.Dev {
// don't descend into directories on other devices
return filepath.SkipDir
}
// Dedupe hardlinks
if s.Nlink > 1 {
if _, ok := dedupedInodes[s.Ino]; !ok {
dedupedInodes[s.Ino] = struct{}{}
} else {
return nil
}
}
if consumption == nil {
usage.Bytes += int64(s.Blocks) * int64(512) // blocksize in bytes
}
if inodes == nil {
usage.Inodes++
}
return nil
})
return usage, err
}

View File

@ -0,0 +1,40 @@
//go:build !linux && !darwin && !windows
// +build !linux,!darwin,!windows
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fs
import (
"fmt"
)
type UsageInfo struct {
Bytes int64
Inodes int64
}
// Info unsupported returns 0 values for available and capacity and an error.
func Info(path string) (int64, int64, int64, int64, int64, int64, error) {
return 0, 0, 0, 0, 0, 0, fmt.Errorf("fsinfo not supported for this build")
}
// DiskUsage gets disk usage of specified path.
func DiskUsage(path string) (UsageInfo, error) {
var usage UsageInfo
return usage, fmt.Errorf("directory disk usage not supported for this build.")
}

View File

@ -0,0 +1,117 @@
//go:build windows
// +build windows
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fs
import (
"os"
"path/filepath"
"syscall"
"unsafe"
"golang.org/x/sys/windows"
)
var (
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
procGetDiskFreeSpaceEx = modkernel32.NewProc("GetDiskFreeSpaceExW")
)
type UsageInfo struct {
Bytes int64
Inodes int64
}
// Info returns (available bytes, byte capacity, byte usage, total inodes, inodes free, inode usage, error)
// for the filesystem that path resides upon.
func Info(path string) (int64, int64, int64, int64, int64, int64, error) {
var freeBytesAvailable, totalNumberOfBytes, totalNumberOfFreeBytes int64
var err error
// The equivalent linux call supports calls against files but the syscall for windows
// fails for files with error code: The directory name is invalid. (#99173)
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
// By always ensuring the directory path we meet all uses cases of this function
path = filepath.Dir(path)
ret, _, err := syscall.Syscall6(
procGetDiskFreeSpaceEx.Addr(),
4,
uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))),
uintptr(unsafe.Pointer(&freeBytesAvailable)),
uintptr(unsafe.Pointer(&totalNumberOfBytes)),
uintptr(unsafe.Pointer(&totalNumberOfFreeBytes)),
0,
0,
)
if ret == 0 {
return 0, 0, 0, 0, 0, 0, err
}
return freeBytesAvailable, totalNumberOfBytes, totalNumberOfBytes - freeBytesAvailable, 0, 0, 0, nil
}
// DiskUsage gets disk usage of specified path.
func DiskUsage(path string) (UsageInfo, error) {
var usage UsageInfo
info, err := os.Lstat(path)
if err != nil {
return usage, err
}
usage.Bytes, err = diskUsage(path, info)
return usage, err
}
func diskUsage(currPath string, info os.FileInfo) (int64, error) {
var size int64
if info.Mode()&os.ModeSymlink != 0 {
return size, nil
}
size += info.Size()
if !info.IsDir() {
return size, nil
}
dir, err := os.Open(currPath)
if err != nil {
return size, err
}
defer dir.Close()
files, err := dir.Readdir(-1)
if err != nil {
return size, err
}
for _, file := range files {
if file.IsDir() {
s, err := diskUsage(filepath.Join(currPath, file.Name()), file)
if err != nil {
return size, err
}
size += s
} else {
size += file.Size()
}
}
return size, nil
}

View File

@ -0,0 +1,28 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package common
// QuotaID is generic quota identifier.
// Data type based on quotactl(2).
type QuotaID int32
const (
// UnknownQuotaID -- cannot determine whether a quota is in force
UnknownQuotaID QuotaID = -1
// BadQuotaID -- Invalid quota
BadQuotaID QuotaID = 0
)

View File

@ -0,0 +1,90 @@
//go:build linux
// +build linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package common
import (
"regexp"
)
// QuotaType -- type of quota to be applied
type QuotaType int
const (
// FSQuotaAccounting for quotas for accounting only
FSQuotaAccounting QuotaType = 1 << iota
// FSQuotaEnforcing for quotas for enforcement
FSQuotaEnforcing QuotaType = 1 << iota
)
// FirstQuota is the quota ID we start with.
// XXXXXXX Need a better way of doing this...
var FirstQuota QuotaID = 1048577
// MountsFile is the location of the system mount data
var MountsFile = "/proc/self/mounts"
// MountParseRegexp parses out /proc/sys/self/mounts
var MountParseRegexp = regexp.MustCompilePOSIX("^([^ ]*)[ \t]*([^ ]*)[ \t]*([^ ]*)") // Ignore options etc.
// LinuxVolumeQuotaProvider returns an appropriate quota applier
// object if we can support quotas on this device
type LinuxVolumeQuotaProvider interface {
// GetQuotaApplier retrieves an object that can apply
// quotas (or nil if this provider cannot support quotas
// on the device)
GetQuotaApplier(mountpoint string, backingDev string) LinuxVolumeQuotaApplier
}
// LinuxVolumeQuotaApplier is a generic interface to any quota
// mechanism supported by Linux
type LinuxVolumeQuotaApplier interface {
// GetQuotaOnDir gets the quota ID (if any) that applies to
// this directory
GetQuotaOnDir(path string) (QuotaID, error)
// SetQuotaOnDir applies the specified quota ID to a directory.
// Negative value for bytes means that a non-enforcing quota
// should be applied (perhaps by setting a quota too large to
// be hit)
SetQuotaOnDir(path string, id QuotaID, bytes int64) error
// QuotaIDIsInUse determines whether the quota ID is in use.
// Implementations should not check /etc/project or /etc/projid,
// only whether their underlying mechanism already has the ID in
// use.
// Return value of false with no error means that the ID is not
// in use; true means that it is already in use. An error
// return means that any quota ID will fail.
QuotaIDIsInUse(id QuotaID) (bool, error)
// GetConsumption returns the consumption (in bytes) of the
// directory, determined by the implementation's quota-based
// mechanism. If it is unable to do so using that mechanism,
// it should return an error and allow higher layers to
// enumerate the directory.
GetConsumption(path string, id QuotaID) (int64, error)
// GetInodes returns the number of inodes used by the
// directory, determined by the implementation's quota-based
// mechanism. If it is unable to do so using that mechanism,
// it should return an error and allow higher layers to
// enumerate the directory.
GetInodes(path string, id QuotaID) (int64, error)
}

View File

@ -0,0 +1,286 @@
//go:build linux
// +build linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package common
import (
"bufio"
"fmt"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
"sync"
"syscall"
"k8s.io/klog/v2"
)
var quotaCmd string
var quotaCmdInitialized bool
var quotaCmdLock sync.RWMutex
// If we later get a filesystem that uses project quota semantics other than
// XFS, we'll need to change this.
// Higher levels don't need to know what's inside
type linuxFilesystemType struct {
name string
typeMagic int64 // Filesystem magic number, per statfs(2)
maxQuota int64
allowEmptyOutput bool // Accept empty output from "quota" command
}
const (
bitsPerWord = 32 << (^uint(0) >> 63) // either 32 or 64
)
var (
linuxSupportedFilesystems = []linuxFilesystemType{
{
name: "XFS",
typeMagic: 0x58465342,
maxQuota: 1<<(bitsPerWord-1) - 1,
allowEmptyOutput: true, // XFS filesystems report nothing if a quota is not present
}, {
name: "ext4fs",
typeMagic: 0xef53,
maxQuota: (1<<(bitsPerWord-1) - 1) & (1<<58 - 1),
allowEmptyOutput: false, // ext4 filesystems always report something even if a quota is not present
},
}
)
// VolumeProvider supplies a quota applier to the generic code.
type VolumeProvider struct {
}
var quotaCmds = []string{"/sbin/xfs_quota",
"/usr/sbin/xfs_quota",
"/bin/xfs_quota"}
var quotaParseRegexp = regexp.MustCompilePOSIX("^[^ \t]*[ \t]*([0-9]+)")
var lsattrCmd = "/usr/bin/lsattr"
var lsattrParseRegexp = regexp.MustCompilePOSIX("^ *([0-9]+) [^ ]+ (.*)$")
// GetQuotaApplier -- does this backing device support quotas that
// can be applied to directories?
func (*VolumeProvider) GetQuotaApplier(mountpoint string, backingDev string) LinuxVolumeQuotaApplier {
for _, fsType := range linuxSupportedFilesystems {
if isFilesystemOfType(mountpoint, backingDev, fsType.typeMagic) {
return linuxVolumeQuotaApplier{mountpoint: mountpoint,
maxQuota: fsType.maxQuota,
allowEmptyOutput: fsType.allowEmptyOutput,
}
}
}
return nil
}
type linuxVolumeQuotaApplier struct {
mountpoint string
maxQuota int64
allowEmptyOutput bool
}
func getXFSQuotaCmd() (string, error) {
quotaCmdLock.Lock()
defer quotaCmdLock.Unlock()
if quotaCmdInitialized {
return quotaCmd, nil
}
for _, program := range quotaCmds {
fileinfo, err := os.Stat(program)
if err == nil && ((fileinfo.Mode().Perm() & (1 << 6)) != 0) {
klog.V(3).Infof("Found xfs_quota program %s", program)
quotaCmd = program
quotaCmdInitialized = true
return quotaCmd, nil
}
}
quotaCmdInitialized = true
return "", fmt.Errorf("no xfs_quota program found")
}
func doRunXFSQuotaCommand(mountpoint string, mountsFile, command string) (string, error) {
quotaCmd, err := getXFSQuotaCmd()
if err != nil {
return "", err
}
// We're using numeric project IDs directly; no need to scan
// /etc/projects or /etc/projid
klog.V(4).Infof("runXFSQuotaCommand %s -t %s -P/dev/null -D/dev/null -x -f %s -c %s", quotaCmd, mountsFile, mountpoint, command)
cmd := exec.Command(quotaCmd, "-t", mountsFile, "-P/dev/null", "-D/dev/null", "-x", "-f", mountpoint, "-c", command)
data, err := cmd.Output()
if err != nil {
return "", err
}
klog.V(4).Infof("runXFSQuotaCommand output %q", string(data))
return string(data), nil
}
// Extract the mountpoint we care about into a temporary mounts file so that xfs_quota does
// not attempt to scan every mount on the filesystem, which could hang if e. g.
// a stuck NFS mount is present.
// See https://bugzilla.redhat.com/show_bug.cgi?id=237120 for an example
// of the problem that could be caused if this were to happen.
func runXFSQuotaCommand(mountpoint string, command string) (string, error) {
tmpMounts, err := os.CreateTemp("", "mounts")
if err != nil {
return "", fmt.Errorf("cannot create temporary mount file: %v", err)
}
tmpMountsFileName := tmpMounts.Name()
defer tmpMounts.Close()
defer os.Remove(tmpMountsFileName)
mounts, err := os.Open(MountsFile)
if err != nil {
return "", fmt.Errorf("cannot open mounts file %s: %v", MountsFile, err)
}
defer mounts.Close()
scanner := bufio.NewScanner(mounts)
for scanner.Scan() {
match := MountParseRegexp.FindStringSubmatch(scanner.Text())
if match != nil {
mount := match[2]
if mount == mountpoint {
if _, err := tmpMounts.WriteString(fmt.Sprintf("%s\n", scanner.Text())); err != nil {
return "", fmt.Errorf("cannot write temporary mounts file: %v", err)
}
if err := tmpMounts.Sync(); err != nil {
return "", fmt.Errorf("cannot sync temporary mounts file: %v", err)
}
return doRunXFSQuotaCommand(mountpoint, tmpMountsFileName, command)
}
}
}
return "", fmt.Errorf("cannot run xfs_quota: cannot find mount point %s in %s", mountpoint, MountsFile)
}
// SupportsQuotas determines whether the filesystem supports quotas.
func SupportsQuotas(mountpoint string, qType QuotaType) (bool, error) {
data, err := runXFSQuotaCommand(mountpoint, "state -p")
if err != nil {
return false, err
}
if qType == FSQuotaEnforcing {
return strings.Contains(data, "Enforcement: ON"), nil
}
return strings.Contains(data, "Accounting: ON"), nil
}
func isFilesystemOfType(mountpoint string, backingDev string, typeMagic int64) bool {
var buf syscall.Statfs_t
err := syscall.Statfs(mountpoint, &buf)
if err != nil {
klog.Warningf("Warning: Unable to statfs %s: %v", mountpoint, err)
return false
}
if int64(buf.Type) != typeMagic {
return false
}
if answer, _ := SupportsQuotas(mountpoint, FSQuotaAccounting); answer {
return true
}
return false
}
// GetQuotaOnDir retrieves the quota ID (if any) associated with the specified directory
// If we can't make system calls, all we can say is that we don't know whether
// it has a quota, and higher levels have to make the call.
func (v linuxVolumeQuotaApplier) GetQuotaOnDir(path string) (QuotaID, error) {
cmd := exec.Command(lsattrCmd, "-pd", path)
data, err := cmd.Output()
if err != nil {
return BadQuotaID, fmt.Errorf("cannot run lsattr: %v", err)
}
match := lsattrParseRegexp.FindStringSubmatch(string(data))
if match == nil {
return BadQuotaID, fmt.Errorf("unable to parse lsattr -pd %s output %s", path, string(data))
}
if match[2] != path {
return BadQuotaID, fmt.Errorf("mismatch between supplied and returned path (%s != %s)", path, match[2])
}
projid, err := strconv.ParseInt(match[1], 10, 32)
if err != nil {
return BadQuotaID, fmt.Errorf("unable to parse project ID from %s (%v)", match[1], err)
}
return QuotaID(projid), nil
}
// SetQuotaOnDir applies a quota to the specified directory under the specified mountpoint.
func (v linuxVolumeQuotaApplier) SetQuotaOnDir(path string, id QuotaID, bytes int64) error {
if bytes < 0 || bytes > v.maxQuota {
bytes = v.maxQuota
}
_, err := runXFSQuotaCommand(v.mountpoint, fmt.Sprintf("limit -p bhard=%v bsoft=%v %v", bytes, bytes, id))
if err != nil {
return err
}
_, err = runXFSQuotaCommand(v.mountpoint, fmt.Sprintf("project -s -p %s %v", path, id))
return err
}
func getQuantity(mountpoint string, id QuotaID, xfsQuotaArg string, multiplier int64, allowEmptyOutput bool) (int64, error) {
data, err := runXFSQuotaCommand(mountpoint, fmt.Sprintf("quota -p -N -n -v %s %v", xfsQuotaArg, id))
if err != nil {
return 0, fmt.Errorf("unable to run xfs_quota: %v", err)
}
if data == "" && allowEmptyOutput {
return 0, nil
}
match := quotaParseRegexp.FindStringSubmatch(data)
if match == nil {
return 0, fmt.Errorf("unable to parse quota output '%s'", data)
}
size, err := strconv.ParseInt(match[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse data size '%s' from '%s': %v", match[1], data, err)
}
klog.V(4).Infof("getQuantity %s %d %s %d => %d %v", mountpoint, id, xfsQuotaArg, multiplier, size, err)
return size * multiplier, nil
}
// GetConsumption returns the consumption in bytes if available via quotas
func (v linuxVolumeQuotaApplier) GetConsumption(_ string, id QuotaID) (int64, error) {
return getQuantity(v.mountpoint, id, "-b", 1024, v.allowEmptyOutput)
}
// GetInodes returns the inodes in use if available via quotas
func (v linuxVolumeQuotaApplier) GetInodes(_ string, id QuotaID) (int64, error) {
return getQuantity(v.mountpoint, id, "-i", 1, v.allowEmptyOutput)
}
// QuotaIDIsInUse checks whether the specified quota ID is in use on the specified
// filesystem
func (v linuxVolumeQuotaApplier) QuotaIDIsInUse(id QuotaID) (bool, error) {
bytes, err := v.GetConsumption(v.mountpoint, id)
if err != nil {
return false, err
}
if bytes > 0 {
return true, nil
}
inodes, err := v.GetInodes(v.mountpoint, id)
return inodes > 0, err
}

View File

@ -0,0 +1,361 @@
//go:build linux
// +build linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fsquota
import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
"strconv"
"sync"
"golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/volume/util/fsquota/common"
)
var projectsFile = "/etc/projects"
var projidFile = "/etc/projid"
var projectsParseRegexp = regexp.MustCompilePOSIX("^([[:digit:]]+):(.*)$")
var projidParseRegexp = regexp.MustCompilePOSIX("^([^#][^:]*):([[:digit:]]+)$")
var quotaIDLock sync.RWMutex
const maxUnusedQuotasToSearch = 128 // Don't go into an infinite loop searching for an unused quota
type projectType struct {
isValid bool // False if we need to remove this line
id common.QuotaID
data string // Project name (projid) or directory (projects)
line string
}
type projectsList struct {
projects []projectType
projid []projectType
}
func projFilesAreOK() error {
if sf, err := os.Lstat(projectsFile); err != nil || sf.Mode().IsRegular() {
if sf, err := os.Lstat(projidFile); err != nil || sf.Mode().IsRegular() {
return nil
}
return fmt.Errorf("%s exists but is not a plain file, cannot continue", projidFile)
}
return fmt.Errorf("%s exists but is not a plain file, cannot continue", projectsFile)
}
func lockFile(file *os.File) error {
return unix.Flock(int(file.Fd()), unix.LOCK_EX)
}
func unlockFile(file *os.File) error {
return unix.Flock(int(file.Fd()), unix.LOCK_UN)
}
// openAndLockProjectFiles opens /etc/projects and /etc/projid locked.
// Creates them if they don't exist
func openAndLockProjectFiles() (*os.File, *os.File, error) {
// Make sure neither project-related file is a symlink!
if err := projFilesAreOK(); err != nil {
return nil, nil, fmt.Errorf("system project files failed verification: %v", err)
}
// We don't actually modify the original files; we create temporaries and
// move them over the originals
fProjects, err := os.OpenFile(projectsFile, os.O_RDONLY|os.O_CREATE, 0644)
if err != nil {
err = fmt.Errorf("unable to open %s: %v", projectsFile, err)
return nil, nil, err
}
fProjid, err := os.OpenFile(projidFile, os.O_RDONLY|os.O_CREATE, 0644)
if err == nil {
// Check once more, to ensure nothing got changed out from under us
if err = projFilesAreOK(); err == nil {
err = lockFile(fProjects)
if err == nil {
err = lockFile(fProjid)
if err == nil {
return fProjects, fProjid, nil
}
// Nothing useful we can do if we get an error here
err = fmt.Errorf("unable to lock %s: %v", projidFile, err)
unlockFile(fProjects)
} else {
err = fmt.Errorf("unable to lock %s: %v", projectsFile, err)
}
} else {
err = fmt.Errorf("system project files failed re-verification: %v", err)
}
fProjid.Close()
} else {
err = fmt.Errorf("unable to open %s: %v", projidFile, err)
}
fProjects.Close()
return nil, nil, err
}
func closeProjectFiles(fProjects *os.File, fProjid *os.File) error {
// Nothing useful we can do if either of these fail,
// but we have to close (and thereby unlock) the files anyway.
var err error
var err1 error
if fProjid != nil {
err = fProjid.Close()
}
if fProjects != nil {
err1 = fProjects.Close()
}
if err == nil {
return err1
}
return err
}
func parseProject(l string) projectType {
if match := projectsParseRegexp.FindStringSubmatch(l); match != nil {
i, err := strconv.Atoi(match[1])
if err == nil {
return projectType{true, common.QuotaID(i), match[2], l}
}
}
return projectType{true, common.BadQuotaID, "", l}
}
func parseProjid(l string) projectType {
if match := projidParseRegexp.FindStringSubmatch(l); match != nil {
i, err := strconv.Atoi(match[2])
if err == nil {
return projectType{true, common.QuotaID(i), match[1], l}
}
}
return projectType{true, common.BadQuotaID, "", l}
}
func parseProjFile(f *os.File, parser func(l string) projectType) []projectType {
var answer []projectType
scanner := bufio.NewScanner(f)
for scanner.Scan() {
answer = append(answer, parser(scanner.Text()))
}
return answer
}
func readProjectFiles(projects *os.File, projid *os.File) projectsList {
return projectsList{parseProjFile(projects, parseProject), parseProjFile(projid, parseProjid)}
}
// findAvailableQuota finds the next available quota from the FirstQuota
// it returns error if QuotaIDIsInUse returns error when getting quota id in use;
// it searches at most maxUnusedQuotasToSearch(128) time
func findAvailableQuota(path string, idMap map[common.QuotaID]bool) (common.QuotaID, error) {
unusedQuotasSearched := 0
for id := common.FirstQuota; true; id++ {
if _, ok := idMap[id]; !ok {
isInUse, err := getApplier(path).QuotaIDIsInUse(id)
if err != nil {
return common.BadQuotaID, err
} else if !isInUse {
return id, nil
}
unusedQuotasSearched++
if unusedQuotasSearched > maxUnusedQuotasToSearch {
break
}
}
}
return common.BadQuotaID, fmt.Errorf("cannot find available quota ID")
}
func addDirToProject(path string, id common.QuotaID, list *projectsList) (common.QuotaID, bool, error) {
idMap := make(map[common.QuotaID]bool)
for _, project := range list.projects {
if project.data == path {
if id != common.BadQuotaID && id != project.id {
return common.BadQuotaID, false, fmt.Errorf("attempt to reassign project ID for %s", path)
}
// Trying to reassign a directory to the project it's
// already in. Maybe this should be an error, but for
// now treat it as an idempotent operation
return project.id, false, nil
}
idMap[project.id] = true
}
var needToAddProjid = true
for _, projid := range list.projid {
idMap[projid.id] = true
if projid.id == id && id != common.BadQuotaID {
needToAddProjid = false
}
}
var err error
if id == common.BadQuotaID {
id, err = findAvailableQuota(path, idMap)
if err != nil {
return common.BadQuotaID, false, err
}
needToAddProjid = true
}
if needToAddProjid {
name := fmt.Sprintf("volume%v", id)
line := fmt.Sprintf("%s:%v", name, id)
list.projid = append(list.projid, projectType{true, id, name, line})
}
line := fmt.Sprintf("%v:%s", id, path)
list.projects = append(list.projects, projectType{true, id, path, line})
return id, needToAddProjid, nil
}
func removeDirFromProject(path string, id common.QuotaID, list *projectsList) (bool, error) {
if id == common.BadQuotaID {
return false, fmt.Errorf("attempt to remove invalid quota ID from %s", path)
}
foundAt := -1
countByID := make(map[common.QuotaID]int)
for i, project := range list.projects {
if project.data == path {
if id != project.id {
return false, fmt.Errorf("attempting to remove quota ID %v from path %s, but expecting ID %v", id, path, project.id)
} else if foundAt != -1 {
return false, fmt.Errorf("found multiple quota IDs for path %s", path)
}
// Faster and easier than deleting an element
list.projects[i].isValid = false
foundAt = i
}
countByID[project.id]++
}
if foundAt == -1 {
return false, fmt.Errorf("cannot find quota associated with path %s", path)
}
if countByID[id] <= 1 {
// Removing the last entry means that we're no longer using
// the quota ID, so remove that as well
for i, projid := range list.projid {
if projid.id == id {
list.projid[i].isValid = false
}
}
return true, nil
}
return false, nil
}
func writeProjectFile(base *os.File, projects []projectType) (string, error) {
oname := base.Name()
stat, err := base.Stat()
if err != nil {
return "", err
}
mode := stat.Mode() & os.ModePerm
f, err := os.CreateTemp(filepath.Dir(oname), filepath.Base(oname))
if err != nil {
return "", err
}
filename := f.Name()
if err := os.Chmod(filename, mode); err != nil {
return "", err
}
for _, proj := range projects {
if proj.isValid {
if _, err := f.WriteString(fmt.Sprintf("%s\n", proj.line)); err != nil {
f.Close()
os.Remove(filename)
return "", err
}
}
}
if err := f.Close(); err != nil {
os.Remove(filename)
return "", err
}
return filename, nil
}
func writeProjectFiles(fProjects *os.File, fProjid *os.File, writeProjid bool, list projectsList) error {
tmpProjects, err := writeProjectFile(fProjects, list.projects)
if err == nil {
// Ensure that both files are written before we try to rename either.
if writeProjid {
tmpProjid, err := writeProjectFile(fProjid, list.projid)
if err == nil {
err = os.Rename(tmpProjid, fProjid.Name())
if err != nil {
os.Remove(tmpProjid)
}
}
}
if err == nil {
err = os.Rename(tmpProjects, fProjects.Name())
if err == nil {
return nil
}
// We're in a bit of trouble here; at this
// point we've successfully renamed tmpProjid
// to the real thing, but renaming tmpProject
// to the real file failed. There's not much we
// can do in this position. Anything we could do
// to try to undo it would itself be likely to fail.
}
os.Remove(tmpProjects)
}
return fmt.Errorf("unable to write project files: %v", err)
}
// if ID is common.BadQuotaID, generate new project id if the dir is not in a project
func createProjectID(path string, ID common.QuotaID) (common.QuotaID, error) {
quotaIDLock.Lock()
defer quotaIDLock.Unlock()
fProjects, fProjid, err := openAndLockProjectFiles()
if err == nil {
defer closeProjectFiles(fProjects, fProjid)
list := readProjectFiles(fProjects, fProjid)
var writeProjid bool
ID, writeProjid, err = addDirToProject(path, ID, &list)
if err == nil && ID != common.BadQuotaID {
if err = writeProjectFiles(fProjects, fProjid, writeProjid, list); err == nil {
return ID, nil
}
}
}
return common.BadQuotaID, fmt.Errorf("createProjectID %s %v failed %v", path, ID, err)
}
func removeProjectID(path string, ID common.QuotaID) error {
if ID == common.BadQuotaID {
return fmt.Errorf("attempting to remove invalid quota ID %v", ID)
}
quotaIDLock.Lock()
defer quotaIDLock.Unlock()
fProjects, fProjid, err := openAndLockProjectFiles()
if err == nil {
defer closeProjectFiles(fProjects, fProjid)
list := readProjectFiles(fProjects, fProjid)
var writeProjid bool
writeProjid, err = removeDirFromProject(path, ID, &list)
if err == nil {
if err = writeProjectFiles(fProjects, fProjid, writeProjid, list); err == nil {
return nil
}
}
}
return fmt.Errorf("removeProjectID %s %v failed %v", path, ID, err)
}

View File

@ -0,0 +1,55 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fsquota
import (
"k8s.io/mount-utils"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/volume/util/fsquota/common"
)
// Interface -- quota interface
type Interface interface {
// GetQuotaOnDir gets the quota ID (if any) that applies to
// this directory
GetQuotaOnDir(m mount.Interface, path string) (common.QuotaID, error)
// Does the path provided support quotas, and if so, what types
SupportsQuotas(m mount.Interface, path string) (bool, error)
// Assign a quota (picked by the quota mechanism) to a path,
// and return it.
AssignQuota(m mount.Interface, path string, poduid types.UID, bytes *resource.Quantity) error
// Get the quota-based storage consumption for the path
GetConsumption(path string) (*resource.Quantity, error)
// Get the quota-based inode consumption for the path
GetInodes(path string) (*resource.Quantity, error)
// Remove the quota from a path
// Implementations may assume that any data covered by the
// quota has already been removed.
ClearQuota(m mount.Interface, path string) error
}
func enabledQuotasForMonitoring() bool {
return utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolationFSQuotaMonitoring)
}

View File

@ -0,0 +1,476 @@
//go:build linux
// +build linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fsquota
import (
"bufio"
"fmt"
"os"
"path/filepath"
"sync"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/kubernetes/pkg/volume/util/fsquota/common"
)
// Pod -> External Pod UID
var podUidMap = make(map[types.UID]types.UID)
// Pod -> ID
var podQuotaMap = make(map[types.UID]common.QuotaID)
// Dir -> ID (for convenience)
var dirQuotaMap = make(map[string]common.QuotaID)
// ID -> pod
var quotaPodMap = make(map[common.QuotaID]types.UID)
// Directory -> pod
var dirPodMap = make(map[string]types.UID)
// Backing device -> applier
// This is *not* cleaned up; its size will be bounded.
var devApplierMap = make(map[string]common.LinuxVolumeQuotaApplier)
// Directory -> applier
var dirApplierMap = make(map[string]common.LinuxVolumeQuotaApplier)
var dirApplierLock sync.RWMutex
// Pod -> refcount
var podDirCountMap = make(map[types.UID]int)
// ID -> size
var quotaSizeMap = make(map[common.QuotaID]int64)
var quotaLock sync.RWMutex
var supportsQuotasMap = make(map[string]bool)
var supportsQuotasLock sync.RWMutex
// Directory -> backingDev
var backingDevMap = make(map[string]string)
var backingDevLock sync.RWMutex
var mountpointMap = make(map[string]string)
var mountpointLock sync.RWMutex
var providers = []common.LinuxVolumeQuotaProvider{
&common.VolumeProvider{},
}
// Separate the innards for ease of testing
func detectBackingDevInternal(mountpoint string, mounts string) (string, error) {
file, err := os.Open(mounts)
if err != nil {
return "", err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
match := common.MountParseRegexp.FindStringSubmatch(scanner.Text())
if match != nil {
device := match[1]
mount := match[2]
if mount == mountpoint {
return device, nil
}
}
}
return "", fmt.Errorf("couldn't find backing device for %s", mountpoint)
}
// detectBackingDev assumes that the mount point provided is valid
func detectBackingDev(_ mount.Interface, mountpoint string) (string, error) {
return detectBackingDevInternal(mountpoint, common.MountsFile)
}
func clearBackingDev(path string) {
backingDevLock.Lock()
defer backingDevLock.Unlock()
delete(backingDevMap, path)
}
// Assumes that the path has been fully canonicalized
// Breaking this up helps with testing
func detectMountpointInternal(m mount.Interface, path string) (string, error) {
for path != "" && path != "/" {
// per k8s.io/mount-utils/mount_linux this detects all but
// a bind mount from one part of a mount to another.
// For our purposes that's fine; we simply want the "true"
// mount point
//
// IsNotMountPoint proved much more troublesome; it actually
// scans the mounts, and when a lot of mount/unmount
// activity takes place, it is not able to get a consistent
// view of /proc/self/mounts, causing it to time out and
// report incorrectly.
isNotMount, err := m.IsLikelyNotMountPoint(path)
if err != nil {
return "/", err
}
if !isNotMount {
return path, nil
}
path = filepath.Dir(path)
}
return "/", nil
}
func detectMountpoint(m mount.Interface, path string) (string, error) {
xpath, err := filepath.Abs(path)
if err != nil {
return "/", err
}
xpath, err = filepath.EvalSymlinks(xpath)
if err != nil {
return "/", err
}
if xpath, err = detectMountpointInternal(m, xpath); err == nil {
return xpath, nil
}
return "/", err
}
func clearMountpoint(path string) {
mountpointLock.Lock()
defer mountpointLock.Unlock()
delete(mountpointMap, path)
}
// getFSInfo Returns mountpoint and backing device
// getFSInfo should cache the mountpoint and backing device for the
// path.
func getFSInfo(m mount.Interface, path string) (string, string, error) {
mountpointLock.Lock()
defer mountpointLock.Unlock()
backingDevLock.Lock()
defer backingDevLock.Unlock()
var err error
mountpoint, okMountpoint := mountpointMap[path]
if !okMountpoint {
mountpoint, err = detectMountpoint(m, path)
if err != nil {
return "", "", fmt.Errorf("cannot determine mountpoint for %s: %v", path, err)
}
}
backingDev, okBackingDev := backingDevMap[path]
if !okBackingDev {
backingDev, err = detectBackingDev(m, mountpoint)
if err != nil {
return "", "", fmt.Errorf("cannot determine backing device for %s: %v", path, err)
}
}
mountpointMap[path] = mountpoint
backingDevMap[path] = backingDev
return mountpoint, backingDev, nil
}
func clearFSInfo(path string) {
clearMountpoint(path)
clearBackingDev(path)
}
func getApplier(path string) common.LinuxVolumeQuotaApplier {
dirApplierLock.Lock()
defer dirApplierLock.Unlock()
return dirApplierMap[path]
}
func setApplier(path string, applier common.LinuxVolumeQuotaApplier) {
dirApplierLock.Lock()
defer dirApplierLock.Unlock()
dirApplierMap[path] = applier
}
func clearApplier(path string) {
dirApplierLock.Lock()
defer dirApplierLock.Unlock()
delete(dirApplierMap, path)
}
func setQuotaOnDir(path string, id common.QuotaID, bytes int64) error {
return getApplier(path).SetQuotaOnDir(path, id, bytes)
}
func GetQuotaOnDir(m mount.Interface, path string) (common.QuotaID, error) {
_, _, err := getFSInfo(m, path)
if err != nil {
return common.BadQuotaID, err
}
return getApplier(path).GetQuotaOnDir(path)
}
func clearQuotaOnDir(m mount.Interface, path string, userNamespacesEnabled bool) error {
// Since we may be called without path being in the map,
// we explicitly have to check in this case.
klog.V(4).Infof("clearQuotaOnDir %s", path)
supportsQuotas, err := SupportsQuotas(m, path, userNamespacesEnabled)
if err != nil {
// Log-and-continue instead of returning an error for now
// due to unspecified backwards compatibility concerns (a subject to revise)
klog.V(3).Infof("Attempt to check for quota support failed: %v", err)
}
if !supportsQuotas {
return nil
}
projid, err := GetQuotaOnDir(m, path)
if err == nil && projid != common.BadQuotaID {
// This means that we have a quota on the directory but
// we can't clear it. That's not good.
err = setQuotaOnDir(path, projid, 0)
if err != nil {
klog.V(3).Infof("Attempt to clear quota failed: %v", err)
}
// Even if clearing the quota failed, we still need to
// try to remove the project ID, or that may be left dangling.
err1 := removeProjectID(path, projid)
if err1 != nil {
klog.V(3).Infof("Attempt to remove quota ID from system files failed: %v", err1)
}
clearFSInfo(path)
if err != nil {
return err
}
return err1
}
// If we couldn't get a quota, that's fine -- there may
// never have been one, and we have no way to know otherwise
klog.V(3).Infof("clearQuotaOnDir fails %v", err)
return nil
}
// SupportsQuotas -- Does the path support quotas
// Cache the applier for paths that support quotas. For paths that don't,
// don't cache the result because nothing will clean it up.
// However, do cache the device->applier map; the number of devices
// is bounded.
// User namespaces prevent changes to project IDs on the filesystem,
// ensuring xfs-quota metrics' reliability; hence, userNamespacesEnabled is checked.
func SupportsQuotas(m mount.Interface, path string, userNamespacesEnabled bool) (bool, error) {
if !enabledQuotasForMonitoring() {
klog.V(3).Info("SupportsQuotas called, but quotas disabled")
return false, nil
}
if !userNamespacesEnabled {
klog.V(3).Info("SupportQuotas called and LocalStorageCapacityIsolationFSQuotaMonitoring enabled, but pod is not in a user namespace")
return false, nil
}
supportsQuotasLock.Lock()
defer supportsQuotasLock.Unlock()
if supportsQuotas, ok := supportsQuotasMap[path]; ok {
return supportsQuotas, nil
}
mount, dev, err := getFSInfo(m, path)
if err != nil {
return false, err
}
// Do we know about this device?
applier, ok := devApplierMap[mount]
if !ok {
for _, provider := range providers {
if applier = provider.GetQuotaApplier(mount, dev); applier != nil {
devApplierMap[mount] = applier
break
}
}
}
if applier != nil {
supportsQuotasMap[path] = true
setApplier(path, applier)
return true, nil
}
delete(backingDevMap, path)
delete(mountpointMap, path)
return false, nil
}
// AssignQuota -- assign a quota to the specified directory.
// AssignQuota chooses the quota ID based on the pod UID and path.
// If the pod UID is identical to another one known, it may (but presently
// doesn't) choose the same quota ID as other volumes in the pod.
func AssignQuota(m mount.Interface, path string, poduid types.UID, bytes *resource.Quantity, userNamespacesEnabled bool) error { //nolint:staticcheck
if bytes == nil {
return fmt.Errorf("attempting to assign null quota to %s", path)
}
ibytes := bytes.Value()
if ok, err := SupportsQuotas(m, path, userNamespacesEnabled); !ok {
return fmt.Errorf("quotas not supported on %s: %v", path, err)
}
quotaLock.Lock()
defer quotaLock.Unlock()
// Current policy is to set individual quotas on each volume,
// for each new volume we generate a random UUID and we use that as
// the internal pod uid.
// From fsquota point of view each volume is attached to a
// single unique pod.
// If we decide later that we want to assign one quota for all
// volumes in a pod, we can simply use poduid parameter directly
// If and when we decide permanently that we're going to adopt
// one quota per volume, we can rip all of the pod code out.
externalPodUid := poduid
internalPodUid, ok := dirPodMap[path]
if ok {
if podUidMap[internalPodUid] != externalPodUid {
return fmt.Errorf("requesting quota on existing directory %s but different pod %s %s", path, podUidMap[internalPodUid], externalPodUid)
}
} else {
internalPodUid = types.UID(uuid.NewUUID())
}
oid, ok := podQuotaMap[internalPodUid]
if ok {
if quotaSizeMap[oid] != ibytes {
return fmt.Errorf("requesting quota of different size: old %v new %v", quotaSizeMap[oid], bytes)
}
if _, ok := dirPodMap[path]; ok {
return nil
}
} else {
oid = common.BadQuotaID
}
id, err := createProjectID(path, oid)
if err == nil {
if oid != common.BadQuotaID && oid != id {
return fmt.Errorf("attempt to reassign quota %v to %v", oid, id)
}
// When enforcing quotas are enabled, we'll condition this
// on their being disabled also.
fsbytes := ibytes
if fsbytes > 0 {
fsbytes = -1
}
if err = setQuotaOnDir(path, id, fsbytes); err == nil {
quotaPodMap[id] = internalPodUid
quotaSizeMap[id] = ibytes
podQuotaMap[internalPodUid] = id
dirQuotaMap[path] = id
dirPodMap[path] = internalPodUid
podUidMap[internalPodUid] = externalPodUid
podDirCountMap[internalPodUid]++
klog.V(4).Infof("Assigning quota ID %d (request limit %d, actual limit %d) to %s", id, ibytes, fsbytes, path)
return nil
}
removeProjectID(path, id)
}
return fmt.Errorf("assign quota FAILED %v", err)
}
// GetConsumption -- retrieve the consumption (in bytes) of the directory
func GetConsumption(path string) (*resource.Quantity, error) {
// Note that we actually need to hold the lock at least through
// running the quota command, so it can't get recycled behind our back
quotaLock.Lock()
defer quotaLock.Unlock()
applier := getApplier(path)
// No applier means directory is not under quota management
if applier == nil {
return nil, nil
}
ibytes, err := applier.GetConsumption(path, dirQuotaMap[path])
if err != nil {
return nil, err
}
return resource.NewQuantity(ibytes, resource.DecimalSI), nil
}
// GetInodes -- retrieve the number of inodes in use under the directory
func GetInodes(path string) (*resource.Quantity, error) {
// Note that we actually need to hold the lock at least through
// running the quota command, so it can't get recycled behind our back
quotaLock.Lock()
defer quotaLock.Unlock()
applier := getApplier(path)
// No applier means directory is not under quota management
if applier == nil {
return nil, nil
}
inodes, err := applier.GetInodes(path, dirQuotaMap[path])
if err != nil {
return nil, err
}
return resource.NewQuantity(inodes, resource.DecimalSI), nil
}
// ClearQuota -- remove the quota assigned to a directory
func ClearQuota(m mount.Interface, path string, userNamespacesEnabled bool) error {
klog.V(3).Infof("ClearQuota %s", path)
if !enabledQuotasForMonitoring() {
return fmt.Errorf("clearQuota called, but quotas disabled")
}
quotaLock.Lock()
defer quotaLock.Unlock()
poduid, ok := dirPodMap[path]
if !ok {
// Nothing in the map either means that there was no
// quota to begin with or that we're clearing a
// stale directory, so if we find a quota, just remove it.
// The process of clearing the quota requires that an applier
// be found, which needs to be cleaned up.
defer delete(supportsQuotasMap, path)
defer clearApplier(path)
return clearQuotaOnDir(m, path, userNamespacesEnabled)
}
_, ok = podQuotaMap[poduid]
if !ok {
return fmt.Errorf("clearQuota: No quota available for %s", path)
}
projid, err := GetQuotaOnDir(m, path)
if err != nil {
// Log-and-continue instead of returning an error for now
// due to unspecified backwards compatibility concerns (a subject to revise)
klog.V(3).Infof("Attempt to check quota ID %v on dir %s failed: %v", dirQuotaMap[path], path, err)
}
if projid != dirQuotaMap[path] {
return fmt.Errorf("expected quota ID %v on dir %s does not match actual %v", dirQuotaMap[path], path, projid)
}
count, ok := podDirCountMap[poduid]
if count <= 1 || !ok {
err = clearQuotaOnDir(m, path, userNamespacesEnabled)
// This error should be noted; we still need to clean up
// and otherwise handle in the same way.
if err != nil {
klog.V(3).Infof("Unable to clear quota %v %s: %v", dirQuotaMap[path], path, err)
}
delete(quotaSizeMap, podQuotaMap[poduid])
delete(quotaPodMap, podQuotaMap[poduid])
delete(podDirCountMap, poduid)
delete(podQuotaMap, poduid)
delete(podUidMap, poduid)
} else {
err = removeProjectID(path, projid)
podDirCountMap[poduid]--
klog.V(4).Infof("Not clearing quota for pod %s; still %v dirs outstanding", poduid, podDirCountMap[poduid])
}
delete(dirPodMap, path)
delete(dirQuotaMap, path)
delete(supportsQuotasMap, path)
clearApplier(path)
if err != nil {
return fmt.Errorf("unable to clear quota for %s: %v", path, err)
}
return nil
}

View File

@ -0,0 +1,64 @@
//go:build !linux
// +build !linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fsquota
import (
"errors"
"k8s.io/kubernetes/pkg/volume/util/fsquota/common"
"k8s.io/mount-utils"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
)
// Dummy quota implementation for systems that do not implement support
// for volume quotas
var errNotImplemented = errors.New("not implemented")
func GetQuotaOnDir(_ mount.Interface, _ string) (common.QuotaID, error) {
return common.BadQuotaID, errNotImplemented
}
// SupportsQuotas -- dummy implementation
func SupportsQuotas(_ mount.Interface, _ string, _ bool) (bool, error) {
return false, errNotImplemented
}
// AssignQuota -- dummy implementation
func AssignQuota(_ mount.Interface, _ string, _ types.UID, _ *resource.Quantity, _ bool) error {
return errNotImplemented
}
// GetConsumption -- dummy implementation
func GetConsumption(_ string) (*resource.Quantity, error) {
return nil, errNotImplemented
}
// GetInodes -- dummy implementation
func GetInodes(_ string) (*resource.Quantity, error) {
return nil, errNotImplemented
}
// ClearQuota -- dummy implementation
func ClearQuota(_ mount.Interface, _ string, _ bool) error {
return errNotImplemented
}

View File

@ -0,0 +1,125 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hostutil
import (
"errors"
"os"
"sync"
"k8s.io/mount-utils"
)
// FakeHostUtil is a fake HostUtils implementation for testing
type FakeHostUtil struct {
MountPoints []mount.MountPoint
Filesystem map[string]FileType
mutex sync.Mutex
}
// NewFakeHostUtil returns a struct that implements the HostUtils interface
// for testing
// TODO: no callers were initializing the struct with any MountPoints. Check
// if those are still being used by any callers and if MountPoints still need
// to be a part of the struct.
func NewFakeHostUtil(fs map[string]FileType) *FakeHostUtil {
return &FakeHostUtil{
Filesystem: fs,
}
}
// Compile-time check to make sure FakeHostUtil implements interface
var _ HostUtils = &FakeHostUtil{}
// DeviceOpened checks if block device referenced by pathname is in use by
// checking if is listed as a device in the in-memory mountpoint table.
func (hu *FakeHostUtil) DeviceOpened(pathname string) (bool, error) {
hu.mutex.Lock()
defer hu.mutex.Unlock()
for _, mp := range hu.MountPoints {
if mp.Device == pathname {
return true, nil
}
}
return false, nil
}
// PathIsDevice always returns true
func (hu *FakeHostUtil) PathIsDevice(pathname string) (bool, error) {
return true, nil
}
// GetDeviceNameFromMount given a mount point, find the volume id
func (hu *FakeHostUtil) GetDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
return getDeviceNameFromMount(mounter, mountPath, pluginMountDir)
}
// MakeRShared checks if path is shared and bind-mounts it as rshared if needed.
// No-op for testing
func (hu *FakeHostUtil) MakeRShared(path string) error {
return nil
}
// GetFileType checks for file/directory/socket/block/character devices.
// Defaults to Directory if otherwise unspecified.
func (hu *FakeHostUtil) GetFileType(pathname string) (FileType, error) {
if t, ok := hu.Filesystem[pathname]; ok {
return t, nil
}
return FileType("Directory"), nil
}
// PathExists checks if pathname exists.
func (hu *FakeHostUtil) PathExists(pathname string) (bool, error) {
if _, ok := hu.Filesystem[pathname]; ok {
return true, nil
}
return false, nil
}
// EvalHostSymlinks returns the path name after evaluating symlinks.
// No-op for testing
func (hu *FakeHostUtil) EvalHostSymlinks(pathname string) (string, error) {
return pathname, nil
}
// GetOwner returns the integer ID for the user and group of the given path
// Not implemented for testing
func (hu *FakeHostUtil) GetOwner(pathname string) (int64, int64, error) {
return -1, -1, errors.New("GetOwner not implemented")
}
// GetSELinuxSupport tests if pathname is on a mount that supports SELinux.
// Not implemented for testing
func (hu *FakeHostUtil) GetSELinuxSupport(pathname string) (bool, error) {
return false, nil
}
// GetMode returns permissions of pathname.
// Not implemented for testing
func (hu *FakeHostUtil) GetMode(pathname string) (os.FileMode, error) {
return 0, errors.New("not implemented")
}
// GetSELinuxMountContext returns value of -o context=XYZ mount option on
// given mount point.
func (hu *FakeHostUtil) GetSELinuxMountContext(pathname string) (string, error) {
// This pretends the OS does not support SELinux.
return "", nil
}

View File

@ -0,0 +1,117 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hostutil
import (
"fmt"
"os"
"k8s.io/mount-utils"
)
// FileType enumerates the known set of possible file types.
type FileType string
const (
// FileTypeBlockDev defines a constant for the block device FileType.
FileTypeBlockDev FileType = "BlockDevice"
// FileTypeCharDev defines a constant for the character device FileType.
FileTypeCharDev FileType = "CharDevice"
// FileTypeDirectory defines a constant for the directory FileType.
FileTypeDirectory FileType = "Directory"
// FileTypeFile defines a constant for the file FileType.
FileTypeFile FileType = "File"
// FileTypeSocket defines a constant for the socket FileType.
FileTypeSocket FileType = "Socket"
// FileTypeUnknown defines a constant for an unknown FileType.
FileTypeUnknown FileType = ""
)
var (
errUnknownFileType = fmt.Errorf("only recognise file, directory, socket, block device and character device")
)
// HostUtils defines the set of methods for interacting with paths on a host.
type HostUtils interface {
// DeviceOpened determines if the device (e.g. /dev/sdc) is in use elsewhere
// on the system, i.e. still mounted.
DeviceOpened(pathname string) (bool, error)
// PathIsDevice determines if a path is a device.
PathIsDevice(pathname string) (bool, error)
// GetDeviceNameFromMount finds the device name by checking the mount path
// to get the global mount path within its plugin directory.
// TODO: Remove this method once the rbd and vsphere plugins are removed from in-tree.
GetDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error)
// MakeRShared checks that given path is on a mount with 'rshared' mount
// propagation. If not, it bind-mounts the path as rshared.
MakeRShared(path string) error
// GetFileType checks for file/directory/socket/block/character devices.
GetFileType(pathname string) (FileType, error)
// PathExists tests if the given path already exists
// Error is returned on any other error than "file not found".
PathExists(pathname string) (bool, error)
// EvalHostSymlinks returns the path name after evaluating symlinks.
EvalHostSymlinks(pathname string) (string, error)
// GetOwner returns the integer ID for the user and group of the given path
GetOwner(pathname string) (int64, int64, error)
// GetSELinuxSupport returns true if given path is on a mount that supports
// SELinux.
GetSELinuxSupport(pathname string) (bool, error)
// GetMode returns permissions of the path.
GetMode(pathname string) (os.FileMode, error)
// GetSELinuxMountContext returns value of -o context=XYZ mount option on
// given mount point.
GetSELinuxMountContext(pathname string) (string, error)
}
// Compile-time check to ensure all HostUtil implementations satisfy
// the Interface.
var _ HostUtils = &HostUtil{}
// getFileType checks for file/directory/socket and block/character devices.
func getFileType(pathname string) (FileType, error) {
var pathType FileType
info, err := os.Stat(pathname)
if os.IsNotExist(err) {
return pathType, fmt.Errorf("path %q does not exist", pathname)
}
// err in call to os.Stat
if err != nil {
return pathType, err
}
// checks whether the mode is the target mode.
isSpecificMode := func(mode, targetMode os.FileMode) bool {
return mode&targetMode == targetMode
}
mode := info.Mode()
if mode.IsDir() {
return FileTypeDirectory, nil
} else if mode.IsRegular() {
return FileTypeFile, nil
} else if isSpecificMode(mode, os.ModeSocket) {
return FileTypeSocket, nil
} else if isSpecificMode(mode, os.ModeDevice) {
if isSpecificMode(mode, os.ModeCharDevice) {
return FileTypeCharDev, nil
}
return FileTypeBlockDev, nil
}
return pathType, errUnknownFileType
}

View File

@ -0,0 +1,333 @@
//go:build linux
// +build linux
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hostutil
import (
"fmt"
"os"
"path"
"path/filepath"
"strings"
"syscall"
"github.com/opencontainers/selinux/go-selinux"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
utilpath "k8s.io/utils/path"
)
const (
// Location of the mountinfo file
procMountInfoPath = "/proc/self/mountinfo"
)
// HostUtil implements HostUtils for Linux platforms.
type HostUtil struct {
}
// NewHostUtil returns a struct that implements the HostUtils interface on
// linux platforms
func NewHostUtil() *HostUtil {
return &HostUtil{}
}
// DeviceOpened checks if block device in use by calling Open with O_EXCL flag.
// If pathname is not a device, log and return false with nil error.
// If open returns errno EBUSY, return true with nil error.
// If open returns nil, return false with nil error.
// Otherwise, return false with error
func (hu *HostUtil) DeviceOpened(pathname string) (bool, error) {
return ExclusiveOpenFailsOnDevice(pathname)
}
// PathIsDevice uses FileInfo returned from os.Stat to check if path refers
// to a device.
func (hu *HostUtil) PathIsDevice(pathname string) (bool, error) {
pathType, err := hu.GetFileType(pathname)
isDevice := pathType == FileTypeCharDev || pathType == FileTypeBlockDev
return isDevice, err
}
// ExclusiveOpenFailsOnDevice is shared with NsEnterMounter
func ExclusiveOpenFailsOnDevice(pathname string) (bool, error) {
var isDevice bool
finfo, err := os.Stat(pathname)
if os.IsNotExist(err) {
isDevice = false
}
// err in call to os.Stat
if err != nil {
return false, fmt.Errorf(
"PathIsDevice failed for path %q: %v",
pathname,
err)
}
// path refers to a device
if finfo.Mode()&os.ModeDevice != 0 {
isDevice = true
}
if !isDevice {
klog.Errorf("Path %q is not referring to a device.", pathname)
return false, nil
}
fd, errno := unix.Open(pathname, unix.O_RDONLY|unix.O_EXCL|unix.O_CLOEXEC, 0)
// If the device is in use, open will return an invalid fd.
// When this happens, it is expected that Close will fail and throw an error.
defer unix.Close(fd)
if errno == nil {
// device not in use
return false, nil
} else if errno == unix.EBUSY {
// device is in use
return true, nil
}
// error during call to Open
return false, errno
}
// GetDeviceNameFromMount given a mount point, find the device name from its global mount point
func (hu *HostUtil) GetDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
return getDeviceNameFromMount(mounter, mountPath, pluginMountDir)
}
// getDeviceNameFromMount find the device name from /proc/self/mountinfo in which
// the mount path reference should match the given plugin mount directory. In case no mount path reference
// matches, returns the volume name taken from its given mountPath
func getDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
refs, err := mounter.GetMountRefs(mountPath)
if err != nil {
klog.V(4).Infof("GetMountRefs failed for mount path %q: %v", mountPath, err)
return "", err
}
if len(refs) == 0 {
klog.V(4).Infof("Directory %s is not mounted", mountPath)
return "", fmt.Errorf("directory %s is not mounted", mountPath)
}
for _, ref := range refs {
if strings.HasPrefix(ref, pluginMountDir) {
volumeID, err := filepath.Rel(pluginMountDir, ref)
if err != nil {
klog.Errorf("Failed to get volume id from mount %s - %v", mountPath, err)
return "", err
}
return volumeID, nil
}
}
return path.Base(mountPath), nil
}
// MakeRShared checks that given path is on a mount with 'rshared' mount
// propagation. If not, it bind-mounts the path as rshared.
func (hu *HostUtil) MakeRShared(path string) error {
return DoMakeRShared(path, procMountInfoPath)
}
// GetFileType checks for file/directory/socket/block/character devices.
func (hu *HostUtil) GetFileType(pathname string) (FileType, error) {
return getFileType(pathname)
}
// PathExists tests if the given path already exists
// Error is returned on any other error than "file not found".
func (hu *HostUtil) PathExists(pathname string) (bool, error) {
return utilpath.Exists(utilpath.CheckFollowSymlink, pathname)
}
// EvalHostSymlinks returns the path name after evaluating symlinks.
// TODO once the nsenter implementation is removed, this method can be removed
// from the interface and filepath.EvalSymlinks used directly
func (hu *HostUtil) EvalHostSymlinks(pathname string) (string, error) {
return filepath.EvalSymlinks(pathname)
}
// FindMountInfo returns the mount info on the given path.
func (hu *HostUtil) FindMountInfo(path string) (mount.MountInfo, error) {
return findMountInfo(path, procMountInfoPath)
}
// isShared returns true, if given path is on a mount point that has shared
// mount propagation.
func isShared(mount string, mountInfoPath string) (bool, error) {
info, err := findMountInfo(mount, mountInfoPath)
if err != nil {
return false, err
}
// parse optional parameters
for _, opt := range info.OptionalFields {
if strings.HasPrefix(opt, "shared:") {
return true, nil
}
}
return false, nil
}
func findMountInfo(path, mountInfoPath string) (mount.MountInfo, error) {
infos, err := mount.ParseMountInfo(mountInfoPath)
if err != nil {
return mount.MountInfo{}, err
}
// process /proc/xxx/mountinfo in backward order and find the first mount
// point that is prefix of 'path' - that's the mount where path resides
var info *mount.MountInfo
for i := len(infos) - 1; i >= 0; i-- {
if mount.PathWithinBase(path, infos[i].MountPoint) {
info = &infos[i]
break
}
}
if info == nil {
return mount.MountInfo{}, fmt.Errorf("cannot find mount point for %q", path)
}
return *info, nil
}
// DoMakeRShared is common implementation of MakeRShared on Linux. It checks if
// path is shared and bind-mounts it as rshared if needed. mountCmd and
// mountArgs are expected to contain mount-like command, DoMakeRShared will add
// '--bind <path> <path>' and '--make-rshared <path>' to mountArgs.
func DoMakeRShared(path string, mountInfoFilename string) error {
shared, err := isShared(path, mountInfoFilename)
if err != nil {
return err
}
if shared {
klog.V(4).Infof("Directory %s is already on a shared mount", path)
return nil
}
klog.V(2).Infof("Bind-mounting %q with shared mount propagation", path)
// mount --bind /var/lib/kubelet /var/lib/kubelet
if err := syscall.Mount(path, path, "" /*fstype*/, syscall.MS_BIND, "" /*data*/); err != nil {
return fmt.Errorf("failed to bind-mount %s: %v", path, err)
}
// mount --make-rshared /var/lib/kubelet
if err := syscall.Mount(path, path, "" /*fstype*/, syscall.MS_SHARED|syscall.MS_REC, "" /*data*/); err != nil {
return fmt.Errorf("failed to make %s rshared: %v", path, err)
}
return nil
}
// selinux.SELinuxEnabled implementation for unit tests
type seLinuxEnabledFunc func() bool
// GetSELinux is common implementation of GetSELinuxSupport on Linux.
func GetSELinux(path string, mountInfoFilename string, selinuxEnabled seLinuxEnabledFunc) (bool, error) {
// Skip /proc/mounts parsing if SELinux is disabled.
if !selinuxEnabled() {
return false, nil
}
info, err := findMountInfo(path, mountInfoFilename)
if err != nil {
return false, err
}
// "seclabel" can be both in mount options and super options.
for _, opt := range info.SuperOptions {
if opt == "seclabel" {
return true, nil
}
}
for _, opt := range info.MountOptions {
if opt == "seclabel" {
return true, nil
}
}
return false, nil
}
// GetSELinuxSupport returns true if given path is on a mount that supports
// SELinux.
func (hu *HostUtil) GetSELinuxSupport(pathname string) (bool, error) {
return GetSELinux(pathname, procMountInfoPath, selinux.GetEnabled)
}
// GetOwner returns the integer ID for the user and group of the given path
func (hu *HostUtil) GetOwner(pathname string) (int64, int64, error) {
realpath, err := filepath.EvalSymlinks(pathname)
if err != nil {
return -1, -1, err
}
return GetOwnerLinux(realpath)
}
// GetMode returns permissions of the path.
func (hu *HostUtil) GetMode(pathname string) (os.FileMode, error) {
return GetModeLinux(pathname)
}
// GetOwnerLinux is shared between Linux and NsEnterMounter
// pathname must already be evaluated for symlinks
func GetOwnerLinux(pathname string) (int64, int64, error) {
info, err := os.Stat(pathname)
if err != nil {
return -1, -1, err
}
stat := info.Sys().(*syscall.Stat_t)
return int64(stat.Uid), int64(stat.Gid), nil
}
// GetModeLinux is shared between Linux and NsEnterMounter
func GetModeLinux(pathname string) (os.FileMode, error) {
info, err := os.Stat(pathname)
if err != nil {
return 0, err
}
return info.Mode(), nil
}
// GetSELinuxMountContext returns value of -o context=XYZ mount option on
// given mount point.
func (hu *HostUtil) GetSELinuxMountContext(pathname string) (string, error) {
return getSELinuxMountContext(pathname, procMountInfoPath, selinux.GetEnabled)
}
// getSELinux is common implementation of GetSELinuxSupport on Linux.
// Using an extra function for unit tests.
func getSELinuxMountContext(path string, mountInfoFilename string, selinuxEnabled seLinuxEnabledFunc) (string, error) {
// Skip /proc/mounts parsing if SELinux is disabled.
if !selinuxEnabled() {
return "", nil
}
info, err := findMountInfo(path, mountInfoFilename)
if err != nil {
return "", err
}
for _, opt := range info.SuperOptions {
if !strings.HasPrefix(opt, "context=") {
continue
}
// Remove context=
context := strings.TrimPrefix(opt, "context=")
// Remove double quotes
context = strings.Trim(context, "\"")
return context, nil
}
return "", nil
}

View File

@ -0,0 +1,109 @@
//go:build !linux && !windows
// +build !linux,!windows
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hostutil
import (
"errors"
"os"
"k8s.io/mount-utils"
)
// HostUtil is an HostUtils implementation that allows compilation on
// unsupported platforms
type HostUtil struct{}
// NewHostUtil returns a struct that implements the HostUtils interface on
// unsupported platforms
func NewHostUtil() *HostUtil {
return &HostUtil{}
}
var errUnsupported = errors.New("volume/util/hostutil on this platform is not supported")
// DeviceOpened always returns an error on unsupported platforms
func (hu *HostUtil) DeviceOpened(pathname string) (bool, error) {
return false, errUnsupported
}
// PathIsDevice always returns an error on unsupported platforms
func (hu *HostUtil) PathIsDevice(pathname string) (bool, error) {
return true, errUnsupported
}
// GetDeviceNameFromMount always returns an error on unsupported platforms
func (hu *HostUtil) GetDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
return getDeviceNameFromMount(mounter, mountPath, pluginMountDir)
}
// MakeRShared always returns an error on unsupported platforms
func (hu *HostUtil) MakeRShared(path string) error {
return errUnsupported
}
// GetFileType always returns an error on unsupported platforms
func (hu *HostUtil) GetFileType(pathname string) (FileType, error) {
return FileType("fake"), errUnsupported
}
// MakeFile always returns an error on unsupported platforms
func (hu *HostUtil) MakeFile(pathname string) error {
return errUnsupported
}
// MakeDir always returns an error on unsupported platforms
func (hu *HostUtil) MakeDir(pathname string) error {
return errUnsupported
}
// PathExists always returns an error on unsupported platforms
func (hu *HostUtil) PathExists(pathname string) (bool, error) {
return true, errUnsupported
}
// EvalHostSymlinks always returns an error on unsupported platforms
func (hu *HostUtil) EvalHostSymlinks(pathname string) (string, error) {
return "", errUnsupported
}
// GetOwner always returns an error on unsupported platforms
func (hu *HostUtil) GetOwner(pathname string) (int64, int64, error) {
return -1, -1, errUnsupported
}
// GetSELinuxSupport always returns an error on unsupported platforms
func (hu *HostUtil) GetSELinuxSupport(pathname string) (bool, error) {
return false, errUnsupported
}
// GetMode always returns an error on unsupported platforms
func (hu *HostUtil) GetMode(pathname string) (os.FileMode, error) {
return 0, errUnsupported
}
func getDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
return "", errUnsupported
}
// GetSELinuxMountContext returns value of -o context=XYZ mount option on
// given mount point.
func (hu *HostUtil) GetSELinuxMountContext(pathname string) (string, error) {
return "", errUnsupported
}

View File

@ -0,0 +1,153 @@
//go:build windows
// +build windows
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hostutil
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"syscall"
"golang.org/x/sys/windows"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/util/filesystem"
"k8s.io/mount-utils"
utilpath "k8s.io/utils/path"
)
// HostUtil implements HostUtils for Windows platforms.
type HostUtil struct{}
// NewHostUtil returns a struct that implements HostUtils on Windows platforms
func NewHostUtil() *HostUtil {
return &HostUtil{}
}
// GetDeviceNameFromMount given a mnt point, find the device
func (hu *HostUtil) GetDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
return getDeviceNameFromMount(mounter, mountPath, pluginMountDir)
}
// getDeviceNameFromMount find the device(drive) name in which
// the mount path reference should match the given plugin mount directory. In case no mount path reference
// matches, returns the volume name taken from its given mountPath
func getDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) {
refs, err := mounter.GetMountRefs(mountPath)
if err != nil {
klog.V(4).Infof("GetMountRefs failed for mount path %q: %v", mountPath, err)
return "", err
}
if len(refs) == 0 {
return "", fmt.Errorf("directory %s is not mounted", mountPath)
}
basemountPath := mount.NormalizeWindowsPath(pluginMountDir)
for _, ref := range refs {
if strings.Contains(ref, basemountPath) {
volumeID, err := filepath.Rel(mount.NormalizeWindowsPath(basemountPath), ref)
if err != nil {
klog.Errorf("Failed to get volume id from mount %s - %v", mountPath, err)
return "", err
}
return volumeID, nil
}
}
return filepath.Base(mountPath), nil
}
// DeviceOpened determines if the device is in use elsewhere
func (hu *HostUtil) DeviceOpened(pathname string) (bool, error) {
return false, nil
}
// PathIsDevice determines if a path is a device.
func (hu *HostUtil) PathIsDevice(pathname string) (bool, error) {
return false, nil
}
// MakeRShared checks that given path is on a mount with 'rshared' mount
// propagation. Empty implementation here.
func (hu *HostUtil) MakeRShared(path string) error {
return nil
}
func isSystemCannotAccessErr(err error) bool {
if fserr, ok := err.(*fs.PathError); ok {
errno, ok := fserr.Err.(syscall.Errno)
return ok && errno == windows.ERROR_CANT_ACCESS_FILE
}
return false
}
// GetFileType checks for sockets/block/character devices
func (hu *(HostUtil)) GetFileType(pathname string) (FileType, error) {
filetype, err := getFileType(pathname)
// os.Stat will return a 1920 error (windows.ERROR_CANT_ACCESS_FILE) if we use it on a Unix Socket
// on Windows. In this case, we need to use a different method to check if it's a Unix Socket.
if err == errUnknownFileType || isSystemCannotAccessErr(err) {
if isSocket, errSocket := filesystem.IsUnixDomainSocket(pathname); errSocket == nil && isSocket {
return FileTypeSocket, nil
}
}
return filetype, err
}
// PathExists checks whether the path exists
func (hu *HostUtil) PathExists(pathname string) (bool, error) {
return utilpath.Exists(utilpath.CheckFollowSymlink, pathname)
}
// EvalHostSymlinks returns the path name after evaluating symlinks
func (hu *HostUtil) EvalHostSymlinks(pathname string) (string, error) {
return filepath.EvalSymlinks(pathname)
}
// GetOwner returns the integer ID for the user and group of the given path
// Note that on windows, it always returns 0. We actually don't set Group on
// windows platform, see SetVolumeOwnership implementation.
func (hu *HostUtil) GetOwner(pathname string) (int64, int64, error) {
return -1, -1, nil
}
// GetSELinuxSupport returns a boolean indicating support for SELinux.
// Windows does not support SELinux.
func (hu *HostUtil) GetSELinuxSupport(pathname string) (bool, error) {
return false, nil
}
// GetMode returns permissions of the path.
func (hu *HostUtil) GetMode(pathname string) (os.FileMode, error) {
info, err := os.Stat(pathname)
if err != nil {
return 0, err
}
return info.Mode(), nil
}
// GetSELinuxMountContext returns value of -o context=XYZ mount option on
// given mount point.
func (hu *HostUtil) GetSELinuxMountContext(pathname string) (string, error) {
return "", nil
}

View File

@ -0,0 +1,51 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"io/ioutil"
"os"
"path/filepath"
)
// IoUtil is a mockable util for common IO operations
type IoUtil interface {
ReadFile(filename string) ([]byte, error)
ReadDir(dirname string) ([]os.FileInfo, error)
Lstat(name string) (os.FileInfo, error)
EvalSymlinks(path string) (string, error)
}
type osIOHandler struct{}
// NewIOHandler Create a new IoHandler implementation
func NewIOHandler() IoUtil {
return &osIOHandler{}
}
func (handler *osIOHandler) ReadFile(filename string) ([]byte, error) {
return os.ReadFile(filename)
}
func (handler *osIOHandler) ReadDir(dirname string) ([]os.FileInfo, error) {
return ioutil.ReadDir(dirname)
}
func (handler *osIOHandler) Lstat(name string) (os.FileInfo, error) {
return os.Lstat(name)
}
func (handler *osIOHandler) EvalSymlinks(path string) (string, error) {
return filepath.EvalSymlinks(path)
}

161
e2e/vendor/k8s.io/kubernetes/pkg/volume/util/metrics.go generated vendored Normal file
View File

@ -0,0 +1,161 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"strconv"
"time"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util/types"
)
const (
statusSuccess = "success"
statusFailUnknown = "fail-unknown"
)
/*
* By default, all the following metrics are defined as falling under
* ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
*
* Promoting the stability level of the metric is a responsibility of the component owner, since it
* involves explicitly acknowledging support for the metric across multiple releases, in accordance with
* the metric stability policy.
*/
var StorageOperationMetric = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Name: "storage_operation_duration_seconds",
Help: "Storage operation duration",
Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
StabilityLevel: metrics.ALPHA,
},
[]string{"volume_plugin", "operation_name", "status", "migrated"},
)
var storageOperationEndToEndLatencyMetric = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Name: "volume_operation_total_seconds",
Help: "Storage operation end to end duration in seconds",
Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
StabilityLevel: metrics.ALPHA,
},
[]string{"plugin_name", "operation_name"},
)
var csiOperationsLatencyMetric = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: "csi",
Name: "operations_seconds",
Help: "Container Storage Interface operation duration with gRPC error code status total",
Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
StabilityLevel: metrics.ALPHA,
},
[]string{"driver_name", "method_name", "grpc_status_code", "migrated"},
)
func init() {
registerMetrics()
}
func registerMetrics() {
// legacyregistry is the internal k8s wrapper around the prometheus
// global registry, used specifically for metric stability enforcement
legacyregistry.MustRegister(StorageOperationMetric)
legacyregistry.MustRegister(storageOperationEndToEndLatencyMetric)
legacyregistry.MustRegister(csiOperationsLatencyMetric)
}
// OperationCompleteHook returns a hook to call when an operation is completed
func OperationCompleteHook(plugin, operationName string) func(types.CompleteFuncParam) {
requestTime := time.Now()
opComplete := func(c types.CompleteFuncParam) {
timeTaken := time.Since(requestTime).Seconds()
// Create metric with operation name and plugin name
status := statusSuccess
if *c.Err != nil {
// TODO: Establish well-known error codes to be able to distinguish
// user configuration errors from system errors.
status = statusFailUnknown
}
migrated := false
if c.Migrated != nil {
migrated = *c.Migrated
}
StorageOperationMetric.WithLabelValues(plugin, operationName, status, strconv.FormatBool(migrated)).Observe(timeTaken)
}
return opComplete
}
// FSGroupCompleteHook returns a hook to call when volume recursive permission is changed
func FSGroupCompleteHook(plugin volume.VolumePlugin, spec *volume.Spec) func(types.CompleteFuncParam) {
return OperationCompleteHook(GetFullQualifiedPluginNameForVolume(plugin.GetPluginName(), spec), "volume_apply_access_control")
}
// GetFullQualifiedPluginNameForVolume returns full qualified plugin name for
// given volume. For CSI plugin, it appends plugin driver name at the end of
// plugin name, e.g. kubernetes.io/csi:csi-hostpath. It helps to distinguish
// between metrics emitted for CSI volumes which may be handled by different
// CSI plugin drivers.
func GetFullQualifiedPluginNameForVolume(pluginName string, spec *volume.Spec) string {
if spec != nil {
if spec.Volume != nil && spec.Volume.CSI != nil {
return fmt.Sprintf("%s:%s", pluginName, spec.Volume.CSI.Driver)
}
if spec.PersistentVolume != nil && spec.PersistentVolume.Spec.CSI != nil {
return fmt.Sprintf("%s:%s", pluginName, spec.PersistentVolume.Spec.CSI.Driver)
}
}
return pluginName
}
// RecordOperationLatencyMetric records the end to end latency for certain operation
// into metric volume_operation_total_seconds
func RecordOperationLatencyMetric(plugin, operationName string, secondsTaken float64) {
storageOperationEndToEndLatencyMetric.WithLabelValues(plugin, operationName).Observe(secondsTaken)
}
// RecordCSIOperationLatencyMetrics records the CSI operation latency and grpc status
// into metric csi_kubelet_operations_seconds
func RecordCSIOperationLatencyMetrics(driverName string,
operationName string,
operationErr error,
operationDuration time.Duration,
migrated string) {
csiOperationsLatencyMetric.WithLabelValues(driverName, operationName, getErrorCode(operationErr), migrated).Observe(operationDuration.Seconds())
}
func getErrorCode(err error) string {
if err == nil {
return codes.OK.String()
}
st, ok := status.FromError(err)
if !ok {
// This is not gRPC error. The operation must have failed before gRPC
// method was called, otherwise we would get gRPC error.
return "unknown-non-grpc"
}
return st.Code().String()
}

View File

@ -0,0 +1,114 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
v1 "k8s.io/api/core/v1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
)
// getNestedMountpoints returns a list of mountpoint directories that should be created
// for the volume indicated by name.
// note: the returned list is relative to baseDir
func getNestedMountpoints(name, baseDir string, pod v1.Pod) ([]string, error) {
var retval []string
checkContainer := func(container *v1.Container) error {
var allMountPoints []string // all mount points in this container
var myMountPoints []string // mount points that match name
for _, vol := range container.VolumeMounts {
cleaned := filepath.Clean(vol.MountPath)
allMountPoints = append(allMountPoints, cleaned)
if vol.Name == name {
myMountPoints = append(myMountPoints, cleaned)
}
}
sort.Strings(allMountPoints)
parentPrefix := ".." + string(os.PathSeparator)
// Examine each place where this volume is mounted
for _, myMountPoint := range myMountPoints {
if strings.HasPrefix(myMountPoint, parentPrefix) {
// Don't let a container trick us into creating directories outside of its rootfs
return fmt.Errorf("invalid container mount point %v", myMountPoint)
}
myMPSlash := myMountPoint + string(os.PathSeparator)
// The previously found nested mountpoints.
// NOTE: We can't simply rely on sort.Strings to have all the mountpoints sorted and
// grouped. For example, the following strings are sorted in this exact order:
// /dir/nested, /dir/nested-vol, /dir/nested.vol, /dir/nested/double, /dir/nested2
// The issue is a bit worse for Windows paths, since the \'s value is higher than /'s:
// \dir\nested, \dir\nested-vol, \dir\nested.vol, \dir\nested2, \dir\nested\double
// Because of this, we should use a list of previously mounted mountpoints, rather than only one.
prevNestedMPs := []string{}
// examine each mount point to see if it's nested beneath this volume
// (but skip any that are double-nested beneath this volume)
// For example, if this volume is mounted as /dir and other volumes are mounted
// as /dir/nested and /dir/nested/other, only create /dir/nested.
for _, mp := range allMountPoints {
if !strings.HasPrefix(mp, myMPSlash) {
continue // skip -- not nested beneath myMountPoint
}
isNested := false
for _, prevNestedMP := range prevNestedMPs {
if strings.HasPrefix(mp, prevNestedMP) {
isNested = true
break
}
}
if isNested {
continue // skip -- double nested beneath myMountPoint
}
// since this mount point is nested, remember it so that we can check that following ones aren't nested beneath this one
prevNestedMPs = append(prevNestedMPs, mp+string(os.PathSeparator))
retval = append(retval, mp[len(myMPSlash):])
}
}
return nil
}
var retErr error
podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(c *v1.Container, containerType podutil.ContainerType) bool {
retErr = checkContainer(c)
return retErr == nil
})
if retErr != nil {
return nil, retErr
}
return retval, nil
}
// MakeNestedMountpoints creates mount points in baseDir for volumes mounted beneath name
func MakeNestedMountpoints(name, baseDir string, pod v1.Pod) error {
dirs, err := getNestedMountpoints(name, baseDir, pod)
if err != nil {
return err
}
for _, dir := range dirs {
err := os.MkdirAll(filepath.Join(baseDir, dir), 0755)
if err != nil {
return fmt.Errorf("unable to create nested volume mountpoints: %v", err)
}
}
return nil
}

View File

@ -0,0 +1,267 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package recyclerclient
import (
"context"
"errors"
"fmt"
"sync"
"k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/watch"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
)
// RecycleEventRecorder is a func that defines how to record RecycleEvent.
type RecycleEventRecorder func(eventtype, message string)
// RecycleVolumeByWatchingPodUntilCompletion is intended for use with volume
// Recyclers. This function will save the given Pod to the API and watch it
// until it completes, fails, or the pod's ActiveDeadlineSeconds is exceeded,
// whichever comes first. An attempt to delete a recycler pod is always
// attempted before returning.
//
// In case there is a pod with the same namespace+name already running, this
// function deletes it as it is not able to judge if it is an old recycler
// or user has forged a fake recycler to block Kubernetes from recycling.//
//
// pod - the pod designed by a volume plugin to recycle the volume. pod.Name
// will be overwritten with unique name based on PV.Name.
// client - kube client for API operations.
func RecycleVolumeByWatchingPodUntilCompletion(pvName string, pod *v1.Pod, kubeClient clientset.Interface, recorder RecycleEventRecorder) error {
return internalRecycleVolumeByWatchingPodUntilCompletion(pvName, pod, newRecyclerClient(kubeClient, recorder))
}
// same as above func comments, except 'recyclerClient' is a narrower pod API
// interface to ease testing
func internalRecycleVolumeByWatchingPodUntilCompletion(pvName string, pod *v1.Pod, recyclerClient recyclerClient) error {
klog.V(5).Infof("creating recycler pod for volume %s\n", pod.Name)
// Generate unique name for the recycler pod - we need to get "already
// exists" error when a previous controller has already started recycling
// the volume. Here we assume that pv.Name is already unique.
pod.Name = "recycler-for-" + pvName
pod.GenerateName = ""
stopChannel := make(chan struct{})
defer close(stopChannel)
podCh, err := recyclerClient.WatchPod(pod.Name, pod.Namespace, stopChannel)
if err != nil {
klog.V(4).Infof("cannot start watcher for pod %s/%s: %v", pod.Namespace, pod.Name, err)
return err
}
// Start the pod
_, err = recyclerClient.CreatePod(pod)
if err != nil {
if apierrors.IsAlreadyExists(err) {
deleteErr := recyclerClient.DeletePod(pod.Name, pod.Namespace)
if deleteErr != nil {
return fmt.Errorf("failed to delete old recycler pod %s/%s: %s", pod.Namespace, pod.Name, deleteErr)
}
// Recycler will try again and the old pod will be hopefully deleted
// at that time.
return fmt.Errorf("old recycler pod found, will retry later")
}
return fmt.Errorf("unexpected error creating recycler pod: %+v", err)
}
err = waitForPod(pod, recyclerClient, podCh)
// In all cases delete the recycler pod and log its result.
klog.V(2).Infof("deleting recycler pod %s/%s", pod.Namespace, pod.Name)
deleteErr := recyclerClient.DeletePod(pod.Name, pod.Namespace)
if deleteErr != nil {
klog.Errorf("failed to delete recycler pod %s/%s: %v", pod.Namespace, pod.Name, deleteErr)
}
// Returning recycler error is preferred, the pod will be deleted again on
// the next retry.
if err != nil {
return fmt.Errorf("failed to recycle volume: %s", err)
}
// Recycle succeeded but we failed to delete the recycler pod. Report it,
// the controller will re-try recycling the PV again shortly.
if deleteErr != nil {
return fmt.Errorf("failed to delete recycler pod: %s", deleteErr)
}
return nil
}
// waitForPod watches the pod it until it finishes and send all events on the
// pod to the PV.
func waitForPod(pod *v1.Pod, recyclerClient recyclerClient, podCh <-chan watch.Event) error {
for {
event, ok := <-podCh
if !ok {
return fmt.Errorf("recycler pod %q watch channel had been closed", pod.Name)
}
switch event.Object.(type) {
case *v1.Pod:
// POD changed
pod := event.Object.(*v1.Pod)
klog.V(4).Infof("recycler pod update received: %s %s/%s %s", event.Type, pod.Namespace, pod.Name, pod.Status.Phase)
switch event.Type {
case watch.Added, watch.Modified:
if pod.Status.Phase == v1.PodSucceeded {
// Recycle succeeded.
return nil
}
if pod.Status.Phase == v1.PodFailed {
if pod.Status.Message != "" {
return errors.New(pod.Status.Message)
}
return fmt.Errorf("pod failed, pod.Status.Message unknown")
}
case watch.Deleted:
return fmt.Errorf("recycler pod was deleted")
case watch.Error:
return fmt.Errorf("recycler pod watcher failed")
}
case *v1.Event:
// Event received
podEvent := event.Object.(*v1.Event)
klog.V(4).Infof("recycler event received: %s %s/%s %s/%s %s", event.Type, podEvent.Namespace, podEvent.Name, podEvent.InvolvedObject.Namespace, podEvent.InvolvedObject.Name, podEvent.Message)
if event.Type == watch.Added {
recyclerClient.Event(podEvent.Type, podEvent.Message)
}
}
}
}
// recyclerClient abstracts access to a Pod by providing a narrower interface.
// This makes it easier to mock a client for testing.
type recyclerClient interface {
CreatePod(pod *v1.Pod) (*v1.Pod, error)
GetPod(name, namespace string) (*v1.Pod, error)
DeletePod(name, namespace string) error
// WatchPod returns a ListWatch for watching a pod. The stopChannel is used
// to close the reflector backing the watch. The caller is responsible for
// derring a close on the channel to stop the reflector.
WatchPod(name, namespace string, stopChannel chan struct{}) (<-chan watch.Event, error)
// Event sends an event to the volume that is being recycled.
Event(eventtype, message string)
}
func newRecyclerClient(client clientset.Interface, recorder RecycleEventRecorder) recyclerClient {
return &realRecyclerClient{
client,
recorder,
}
}
type realRecyclerClient struct {
client clientset.Interface
recorder RecycleEventRecorder
}
func (c *realRecyclerClient) CreatePod(pod *v1.Pod) (*v1.Pod, error) {
return c.client.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
}
func (c *realRecyclerClient) GetPod(name, namespace string) (*v1.Pod, error) {
return c.client.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{})
}
func (c *realRecyclerClient) DeletePod(name, namespace string) error {
return c.client.CoreV1().Pods(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
}
func (c *realRecyclerClient) Event(eventtype, message string) {
c.recorder(eventtype, message)
}
// WatchPod watches a pod and events related to it. It sends pod updates and events over the returned channel
// It will continue until stopChannel is closed
func (c *realRecyclerClient) WatchPod(name, namespace string, stopChannel chan struct{}) (<-chan watch.Event, error) {
podSelector, err := fields.ParseSelector("metadata.name=" + name)
if err != nil {
return nil, err
}
options := metav1.ListOptions{
FieldSelector: podSelector.String(),
Watch: true,
}
podWatch, err := c.client.CoreV1().Pods(namespace).Watch(context.TODO(), options)
if err != nil {
return nil, err
}
eventSelector, _ := fields.ParseSelector("involvedObject.name=" + name)
eventWatch, err := c.client.CoreV1().Events(namespace).Watch(context.TODO(), metav1.ListOptions{
FieldSelector: eventSelector.String(),
Watch: true,
})
if err != nil {
podWatch.Stop()
return nil, err
}
eventCh := make(chan watch.Event, 30)
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer close(eventCh)
wg.Wait()
}()
go func() {
defer eventWatch.Stop()
defer wg.Done()
for {
select {
case <-stopChannel:
return
case eventEvent, ok := <-eventWatch.ResultChan():
if !ok {
return
}
eventCh <- eventEvent
}
}
}()
go func() {
defer podWatch.Stop()
defer wg.Done()
for {
select {
case <-stopChannel:
return
case podEvent, ok := <-podWatch.ResultChan():
if !ok {
return
}
eventCh <- podEvent
}
}
}()
return eventCh, nil
}

View File

@ -0,0 +1,441 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"context"
"encoding/json"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/volume"
volumetypes "k8s.io/kubernetes/pkg/volume/util/types"
"k8s.io/mount-utils"
)
var (
knownResizeConditions = map[v1.PersistentVolumeClaimConditionType]bool{
v1.PersistentVolumeClaimFileSystemResizePending: true,
v1.PersistentVolumeClaimResizing: true,
v1.PersistentVolumeClaimControllerResizeError: true,
v1.PersistentVolumeClaimNodeResizeError: true,
}
// AnnPreResizeCapacity annotation is added to a PV when expanding volume.
// Its value is status capacity of the PVC prior to the volume expansion
// Its value will be set by the external-resizer when it deems that filesystem resize is required after resizing volume.
// Its value will be used by pv_controller to determine pvc's status capacity when binding pvc and pv.
AnnPreResizeCapacity = "volume.alpha.kubernetes.io/pre-resize-capacity"
)
type resizeProcessStatus struct {
condition v1.PersistentVolumeClaimCondition
processed bool
}
// UpdatePVSize updates just pv size after cloudprovider resizing is successful
func UpdatePVSize(
pv *v1.PersistentVolume,
newSize resource.Quantity,
kubeClient clientset.Interface) (*v1.PersistentVolume, error) {
pvClone := pv.DeepCopy()
pvClone.Spec.Capacity[v1.ResourceStorage] = newSize
return PatchPV(pv, pvClone, kubeClient)
}
// AddAnnPreResizeCapacity adds volume.alpha.kubernetes.io/pre-resize-capacity from the pv
func AddAnnPreResizeCapacity(
pv *v1.PersistentVolume,
oldCapacity resource.Quantity,
kubeClient clientset.Interface) error {
// if the pv already has a resize annotation skip the process
if metav1.HasAnnotation(pv.ObjectMeta, AnnPreResizeCapacity) {
return nil
}
pvClone := pv.DeepCopy()
if pvClone.ObjectMeta.Annotations == nil {
pvClone.ObjectMeta.Annotations = make(map[string]string)
}
pvClone.ObjectMeta.Annotations[AnnPreResizeCapacity] = oldCapacity.String()
_, err := PatchPV(pv, pvClone, kubeClient)
return err
}
// DeleteAnnPreResizeCapacity deletes volume.alpha.kubernetes.io/pre-resize-capacity from the pv
func DeleteAnnPreResizeCapacity(
pv *v1.PersistentVolume,
kubeClient clientset.Interface) error {
// if the pv does not have a resize annotation skip the entire process
if !metav1.HasAnnotation(pv.ObjectMeta, AnnPreResizeCapacity) {
return nil
}
pvClone := pv.DeepCopy()
delete(pvClone.ObjectMeta.Annotations, AnnPreResizeCapacity)
_, err := PatchPV(pv, pvClone, kubeClient)
return err
}
// PatchPV creates and executes a patch for pv
func PatchPV(
oldPV *v1.PersistentVolume,
newPV *v1.PersistentVolume,
kubeClient clientset.Interface) (*v1.PersistentVolume, error) {
oldData, err := json.Marshal(oldPV)
if err != nil {
return oldPV, fmt.Errorf("unexpected error marshaling old PV %q with error : %v", oldPV.Name, err)
}
newData, err := json.Marshal(newPV)
if err != nil {
return oldPV, fmt.Errorf("unexpected error marshaling new PV %q with error : %v", newPV.Name, err)
}
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, oldPV)
if err != nil {
return oldPV, fmt.Errorf("error Creating two way merge patch for PV %q with error : %v", oldPV.Name, err)
}
updatedPV, err := kubeClient.CoreV1().PersistentVolumes().Patch(context.TODO(), oldPV.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
if err != nil {
return oldPV, fmt.Errorf("error Patching PV %q with error : %v", oldPV.Name, err)
}
return updatedPV, nil
}
// MarkResizeInProgressWithResizer marks cloudprovider resizing as in progress
// and also annotates the PVC with the name of the resizer.
func MarkResizeInProgressWithResizer(
pvc *v1.PersistentVolumeClaim,
resizerName string,
kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
// Mark PVC as Resize Started
progressCondition := v1.PersistentVolumeClaimCondition{
Type: v1.PersistentVolumeClaimResizing,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Now(),
}
conditions := []v1.PersistentVolumeClaimCondition{progressCondition}
newPVC := pvc.DeepCopy()
newPVC = MergeResizeConditionOnPVC(newPVC, conditions, false /* keepOldResizeConditions */)
newPVC = setResizer(newPVC, resizerName)
return PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient)
}
func MarkControllerReisizeInProgress(pvc *v1.PersistentVolumeClaim, resizerName string, newSize resource.Quantity, kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
// Mark PVC as Resize Started
progressCondition := v1.PersistentVolumeClaimCondition{
Type: v1.PersistentVolumeClaimResizing,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Now(),
}
conditions := []v1.PersistentVolumeClaimCondition{progressCondition}
newPVC := pvc.DeepCopy()
newPVC = MergeResizeConditionOnPVC(newPVC, conditions, false /* keepOldResizeConditions */)
newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimControllerResizeInProgress)
newPVC = mergeStorageAllocatedResources(newPVC, newSize)
newPVC = setResizer(newPVC, resizerName)
return PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient)
}
// SetClaimResizer sets resizer annotation on PVC
func SetClaimResizer(
pvc *v1.PersistentVolumeClaim,
resizerName string,
kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
newPVC := pvc.DeepCopy()
newPVC = setResizer(newPVC, resizerName)
return PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient)
}
func setResizer(pvc *v1.PersistentVolumeClaim, resizerName string) *v1.PersistentVolumeClaim {
if val, ok := pvc.Annotations[volumetypes.VolumeResizerKey]; ok && val == resizerName {
return pvc
}
metav1.SetMetaDataAnnotation(&pvc.ObjectMeta, volumetypes.VolumeResizerKey, resizerName)
return pvc
}
// MarkForFSResize marks file system resizing as pending
func MarkForFSResize(
pvc *v1.PersistentVolumeClaim,
kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
pvcCondition := v1.PersistentVolumeClaimCondition{
Type: v1.PersistentVolumeClaimFileSystemResizePending,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Now(),
Message: "Waiting for user to (re-)start a pod to finish file system resize of volume on node.",
}
conditions := []v1.PersistentVolumeClaimCondition{pvcCondition}
newPVC := pvc.DeepCopy()
if utilfeature.DefaultFeatureGate.Enabled(features.RecoverVolumeExpansionFailure) {
newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimNodeResizePending)
}
newPVC = MergeResizeConditionOnPVC(newPVC, conditions, true /* keepOldResizeConditions */)
updatedPVC, err := PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient)
return updatedPVC, err
}
// MarkResizeFinished marks all resizing as done
func MarkResizeFinished(
pvc *v1.PersistentVolumeClaim,
newSize resource.Quantity,
kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
return MarkFSResizeFinished(pvc, newSize, kubeClient)
}
// MarkFSResizeFinished marks file system resizing as done
func MarkFSResizeFinished(
pvc *v1.PersistentVolumeClaim,
newSize resource.Quantity,
kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
newPVC := pvc.DeepCopy()
newPVC.Status.Capacity[v1.ResourceStorage] = newSize
// if RecoverVolumeExpansionFailure is enabled, we need to reset ResizeStatus back to nil
if utilfeature.DefaultFeatureGate.Enabled(features.RecoverVolumeExpansionFailure) {
allocatedResourceStatusMap := newPVC.Status.AllocatedResourceStatuses
delete(allocatedResourceStatusMap, v1.ResourceStorage)
if len(allocatedResourceStatusMap) == 0 {
newPVC.Status.AllocatedResourceStatuses = nil
} else {
newPVC.Status.AllocatedResourceStatuses = allocatedResourceStatusMap
}
}
newPVC = MergeResizeConditionOnPVC(newPVC, []v1.PersistentVolumeClaimCondition{}, false /* keepOldResizeConditions */)
updatedPVC, err := PatchPVCStatus(pvc /*oldPVC*/, newPVC, kubeClient)
return updatedPVC, err
}
// MarkNodeExpansionInfeasible marks a PVC for node expansion as failed. Kubelet should not retry expansion
// of volumes which are in failed state.
func MarkNodeExpansionInfeasible(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface, err error) (*v1.PersistentVolumeClaim, error) {
newPVC := pvc.DeepCopy()
newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimNodeResizeInfeasible)
errorCondition := v1.PersistentVolumeClaimCondition{
Type: v1.PersistentVolumeClaimNodeResizeError,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Now(),
Message: fmt.Sprintf("failed to expand pvc with %v", err),
}
newPVC = MergeResizeConditionOnPVC(newPVC,
[]v1.PersistentVolumeClaimCondition{errorCondition},
true /* keepOldResizeConditions */)
patchBytes, err := createPVCPatch(pvc, newPVC, false /* addResourceVersionCheck */)
if err != nil {
return pvc, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %v", pvc.Name, err)
}
updatedClaim, updateErr := kubeClient.CoreV1().PersistentVolumeClaims(pvc.Namespace).
Patch(context.TODO(), pvc.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
if updateErr != nil {
return pvc, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %v", pvc.Name, updateErr)
}
return updatedClaim, nil
}
func MarkNodeExpansionFailedCondition(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface, err error) (*v1.PersistentVolumeClaim, error) {
newPVC := pvc.DeepCopy()
errorCondition := v1.PersistentVolumeClaimCondition{
Type: v1.PersistentVolumeClaimNodeResizeError,
Status: v1.ConditionTrue,
LastTransitionTime: metav1.Now(),
Message: fmt.Sprintf("failed to expand pvc with %v", err),
}
newPVC = MergeResizeConditionOnPVC(newPVC,
[]v1.PersistentVolumeClaimCondition{errorCondition},
true /* keepOldResizeConditions */)
patchBytes, err := createPVCPatch(pvc, newPVC, false /* addResourceVersionCheck */)
if err != nil {
return pvc, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %w", pvc.Name, err)
}
updatedClaim, updateErr := kubeClient.CoreV1().PersistentVolumeClaims(pvc.Namespace).
Patch(context.TODO(), pvc.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
if updateErr != nil {
return pvc, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %w", pvc.Name, updateErr)
}
return updatedClaim, nil
}
// MarkNodeExpansionInProgress marks pvc expansion in progress on node
func MarkNodeExpansionInProgress(pvc *v1.PersistentVolumeClaim, kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
newPVC := pvc.DeepCopy()
newPVC = mergeStorageResourceStatus(newPVC, v1.PersistentVolumeClaimNodeResizeInProgress)
updatedPVC, err := PatchPVCStatus(pvc /* oldPVC */, newPVC, kubeClient)
return updatedPVC, err
}
// PatchPVCStatus updates PVC status using PATCH verb
// Don't use Update because this can be called from kubelet and if kubelet has an older client its
// Updates will overwrite new fields. And to avoid writing to a stale object, add ResourceVersion
// to the patch so that Patch will fail if the patch's RV != actual up-to-date RV like Update would
func PatchPVCStatus(
oldPVC *v1.PersistentVolumeClaim,
newPVC *v1.PersistentVolumeClaim,
kubeClient clientset.Interface) (*v1.PersistentVolumeClaim, error) {
patchBytes, err := createPVCPatch(oldPVC, newPVC, true /* addResourceVersionCheck */)
if err != nil {
return oldPVC, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %v", oldPVC.Name, err)
}
updatedClaim, updateErr := kubeClient.CoreV1().PersistentVolumeClaims(oldPVC.Namespace).
Patch(context.TODO(), oldPVC.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
if updateErr != nil {
return oldPVC, fmt.Errorf("patchPVCStatus failed to patch PVC %q: %v", oldPVC.Name, updateErr)
}
return updatedClaim, nil
}
func createPVCPatch(
oldPVC *v1.PersistentVolumeClaim,
newPVC *v1.PersistentVolumeClaim, addResourceVersionCheck bool) ([]byte, error) {
oldData, err := json.Marshal(oldPVC)
if err != nil {
return nil, fmt.Errorf("failed to marshal old data: %v", err)
}
newData, err := json.Marshal(newPVC)
if err != nil {
return nil, fmt.Errorf("failed to marshal new data: %v", err)
}
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, oldPVC)
if err != nil {
return nil, fmt.Errorf("failed to create 2 way merge patch: %v", err)
}
if addResourceVersionCheck {
patchBytes, err = addResourceVersion(patchBytes, oldPVC.ResourceVersion)
if err != nil {
return nil, fmt.Errorf("failed to add resource version: %v", err)
}
}
return patchBytes, nil
}
func addResourceVersion(patchBytes []byte, resourceVersion string) ([]byte, error) {
var patchMap map[string]interface{}
err := json.Unmarshal(patchBytes, &patchMap)
if err != nil {
return nil, fmt.Errorf("error unmarshalling patch: %v", err)
}
u := unstructured.Unstructured{Object: patchMap}
a, err := meta.Accessor(&u)
if err != nil {
return nil, fmt.Errorf("error creating accessor: %v", err)
}
a.SetResourceVersion(resourceVersion)
versionBytes, err := json.Marshal(patchMap)
if err != nil {
return nil, fmt.Errorf("error marshalling json patch: %v", err)
}
return versionBytes, nil
}
// MergeResizeConditionOnPVC updates pvc with requested resize conditions
// leaving other conditions untouched.
func MergeResizeConditionOnPVC(
pvc *v1.PersistentVolumeClaim,
resizeConditions []v1.PersistentVolumeClaimCondition, keepOldResizeConditions bool) *v1.PersistentVolumeClaim {
resizeConditionMap := map[v1.PersistentVolumeClaimConditionType]*resizeProcessStatus{}
for _, condition := range resizeConditions {
resizeConditionMap[condition.Type] = &resizeProcessStatus{condition, false}
}
oldConditions := pvc.Status.Conditions
newConditions := []v1.PersistentVolumeClaimCondition{}
for _, condition := range oldConditions {
// If Condition is of not resize type, we keep it.
if _, ok := knownResizeConditions[condition.Type]; !ok {
newConditions = append(newConditions, condition)
continue
}
if newCondition, ok := resizeConditionMap[condition.Type]; ok {
if newCondition.condition.Status != condition.Status {
newConditions = append(newConditions, newCondition.condition)
} else {
newConditions = append(newConditions, condition)
}
newCondition.processed = true
} else if keepOldResizeConditions {
// if keepOldResizeConditions is true, we keep the old resize conditions that were present in the
// existing pvc.Status.Conditions field.
newConditions = append(newConditions, condition)
}
}
// append all unprocessed conditions
for _, newCondition := range resizeConditionMap {
if !newCondition.processed {
newConditions = append(newConditions, newCondition.condition)
}
}
pvc.Status.Conditions = newConditions
return pvc
}
func mergeStorageResourceStatus(pvc *v1.PersistentVolumeClaim, status v1.ClaimResourceStatus) *v1.PersistentVolumeClaim {
allocatedResourceStatusMap := pvc.Status.AllocatedResourceStatuses
if allocatedResourceStatusMap == nil {
pvc.Status.AllocatedResourceStatuses = map[v1.ResourceName]v1.ClaimResourceStatus{
v1.ResourceStorage: status,
}
return pvc
}
allocatedResourceStatusMap[v1.ResourceStorage] = status
pvc.Status.AllocatedResourceStatuses = allocatedResourceStatusMap
return pvc
}
func mergeStorageAllocatedResources(pvc *v1.PersistentVolumeClaim, size resource.Quantity) *v1.PersistentVolumeClaim {
allocatedResourcesMap := pvc.Status.AllocatedResources
if allocatedResourcesMap == nil {
pvc.Status.AllocatedResources = map[v1.ResourceName]resource.Quantity{
v1.ResourceStorage: size,
}
return pvc
}
allocatedResourcesMap[v1.ResourceStorage] = size
pvc.Status.AllocatedResources = allocatedResourcesMap
return pvc
}
// GenericResizeFS : call generic filesystem resizer for plugins that don't have any special filesystem resize requirements
func GenericResizeFS(host volume.VolumeHost, pluginName, devicePath, deviceMountPath string) (bool, error) {
resizer := mount.NewResizeFs(host.GetExec(pluginName))
return resizer.Resize(devicePath, deviceMountPath)
}

313
e2e/vendor/k8s.io/kubernetes/pkg/volume/util/selinux.go generated vendored Normal file
View File

@ -0,0 +1,313 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"errors"
"fmt"
"strings"
"github.com/opencontainers/selinux/go-selinux"
"github.com/opencontainers/selinux/go-selinux/label"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/volume"
)
// SELinuxLabelTranslator translates v1.SELinuxOptions of a process to SELinux file label.
type SELinuxLabelTranslator interface {
// SELinuxOptionsToFileLabel returns SELinux file label for given SELinuxOptions
// of a container process.
// When Role, User or Type are empty, they're read from the system defaults.
// It returns "" and no error on platforms that do not have SELinux enabled
// or don't support SELinux at all.
SELinuxOptionsToFileLabel(opts *v1.SELinuxOptions) (string, error)
// SELinuxEnabled returns true when the OS has enabled SELinux support.
SELinuxEnabled() bool
}
// Real implementation of the interface.
// On Linux with SELinux enabled it translates. Otherwise it always returns an empty string and no error.
type translator struct{}
var _ SELinuxLabelTranslator = &translator{}
// NewSELinuxLabelTranslator returns new SELinuxLabelTranslator for the platform.
func NewSELinuxLabelTranslator() SELinuxLabelTranslator {
return &translator{}
}
// SELinuxOptionsToFileLabel returns SELinux file label for given SELinuxOptions
// of a container process.
// When Role, User or Type are empty, they're read from the system defaults.
// It returns "" and no error on platforms that do not have SELinux enabled
// or don't support SELinux at all.
func (l *translator) SELinuxOptionsToFileLabel(opts *v1.SELinuxOptions) (string, error) {
if opts == nil {
return "", nil
}
args := contextOptions(opts)
if len(args) == 0 {
return "", nil
}
processLabel, fileLabel, err := label.InitLabels(args)
if err != nil {
// In theory, this should be unreachable. InitLabels can fail only when args contain an unknown option,
// and all options returned by contextOptions are known.
return "", &SELinuxLabelTranslationError{msg: err.Error()}
}
// InitLabels() may allocate a new unique SELinux label in kubelet memory. The label is *not* allocated
// in the container runtime. Clear it to avoid memory problems.
// ReleaseLabel on non-allocated label is NOOP.
selinux.ReleaseLabel(processLabel)
return fileLabel, nil
}
// Convert SELinuxOptions to []string accepted by label.InitLabels
func contextOptions(opts *v1.SELinuxOptions) []string {
if opts == nil {
return nil
}
args := make([]string, 0, 3)
if opts.User != "" {
args = append(args, "user:"+opts.User)
}
if opts.Role != "" {
args = append(args, "role:"+opts.Role)
}
if opts.Type != "" {
args = append(args, "type:"+opts.Type)
}
if opts.Level != "" {
args = append(args, "level:"+opts.Level)
}
return args
}
func (l *translator) SELinuxEnabled() bool {
return selinux.GetEnabled()
}
// Fake implementation of the interface for unit tests.
type fakeTranslator struct{}
var _ SELinuxLabelTranslator = &fakeTranslator{}
// NewFakeSELinuxLabelTranslator returns a fake translator for unit tests.
// It imitates a real translator on platforms that do not have SELinux enabled
// or don't support SELinux at all.
func NewFakeSELinuxLabelTranslator() SELinuxLabelTranslator {
return &fakeTranslator{}
}
// SELinuxOptionsToFileLabel returns SELinux file label for given options.
func (l *fakeTranslator) SELinuxOptionsToFileLabel(opts *v1.SELinuxOptions) (string, error) {
if opts == nil {
return "", nil
}
// Fill empty values from "system defaults" (taken from Fedora Linux).
user := opts.User
if user == "" {
user = "system_u"
}
role := opts.Role
if role == "" {
role = "object_r"
}
// opts is context of the *process* to run in a container. Translate
// process type "container_t" to file label type "container_file_t".
// (The rest of the context is the same for processes and files).
fileType := opts.Type
if fileType == "" || fileType == "container_t" {
fileType = "container_file_t"
}
level := opts.Level
if level == "" {
// If empty, level is allocated randomly.
level = "s0:c998,c999"
}
ctx := fmt.Sprintf("%s:%s:%s:%s", user, role, fileType, level)
return ctx, nil
}
func (l *fakeTranslator) SELinuxEnabled() bool {
return true
}
type SELinuxLabelTranslationError struct {
msg string
}
func (e *SELinuxLabelTranslationError) Error() string {
return e.msg
}
func IsSELinuxLabelTranslationError(err error) bool {
var seLinuxError *SELinuxLabelTranslationError
return errors.As(err, &seLinuxError)
}
// SupportsSELinuxContextMount checks if the given volumeSpec supports with mount -o context
func SupportsSELinuxContextMount(volumeSpec *volume.Spec, volumePluginMgr *volume.VolumePluginMgr) (bool, error) {
plugin, _ := volumePluginMgr.FindPluginBySpec(volumeSpec)
if plugin != nil {
return plugin.SupportsSELinuxContextMount(volumeSpec)
}
return false, nil
}
// VolumeSupportsSELinuxMount returns true if given volume access mode can support mount with SELinux mount options.
func VolumeSupportsSELinuxMount(volumeSpec *volume.Spec) bool {
if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
return false
}
if volumeSpec.PersistentVolume == nil {
return false
}
if utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMount) {
return true
}
// Only SELinuxMountReadWriteOncePod feature is enabled
if len(volumeSpec.PersistentVolume.Spec.AccessModes) != 1 {
// RWOP volumes must be the only access mode of the volume
return false
}
if !v1helper.ContainsAccessMode(volumeSpec.PersistentVolume.Spec.AccessModes, v1.ReadWriteOncePod) {
// Not a RWOP volume
return false
}
// RWOP volume
return true
}
// MultipleSELinuxLabelsError tells that one volume in a pod is mounted in multiple containers and each has a different SELinux label.
type MultipleSELinuxLabelsError struct {
labels []string
}
func (e *MultipleSELinuxLabelsError) Error() string {
return fmt.Sprintf("multiple SELinux labels found: %s", strings.Join(e.labels, ","))
}
func (e *MultipleSELinuxLabelsError) Labels() []string {
return e.labels
}
func IsMultipleSELinuxLabelsError(err error) bool {
var multiError *MultipleSELinuxLabelsError
return errors.As(err, &multiError)
}
// AddSELinuxMountOption adds -o context="XYZ" mount option to a given list
func AddSELinuxMountOption(options []string, seLinuxContext string) []string {
if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
return options
}
// Use double quotes to support a comma "," in the SELinux context string.
// For example: dirsync,context="system_u:object_r:container_file_t:s0:c15,c25",noatime
return append(options, fmt.Sprintf("context=%q", seLinuxContext))
}
// SELinuxLabelInfo contains information about SELinux labels that should be used to mount a volume for a Pod.
type SELinuxLabelInfo struct {
// SELinuxMountLabel is the SELinux label that should be used to mount the volume.
// The volume plugin supports SELinuxMount and the Pod did not opt out via SELinuxChangePolicy.
// Empty string otherwise.
SELinuxMountLabel string
// SELinuxProcessLabel is the SELinux label that will the container runtime use for the Pod.
// Regardless if the volume plugin supports SELinuxMount or the Pod opted out via SELinuxChangePolicy.
SELinuxProcessLabel string
// PluginSupportsSELinuxContextMount is true if the volume plugin supports SELinux mount.
PluginSupportsSELinuxContextMount bool
}
// GetMountSELinuxLabel returns SELinux labels that should be used to mount the given volume volumeSpec and podSecurityContext.
// It expects effectiveSELinuxContainerLabels as returned by volumeutil.GetPodVolumeNames, i.e. with all SELinuxOptions
// from all containers that use the volume in the pod, potentially expanded with PodSecurityContext.SELinuxOptions,
// if container's SELinuxOptions are nil.
// It does not evaluate the volume access mode! It's up to the caller to check SELinuxMount feature gate,
// it may need to bump different metrics based on feature gates / access modes / label anyway.
func GetMountSELinuxLabel(volumeSpec *volume.Spec, effectiveSELinuxContainerLabels []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext, volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator SELinuxLabelTranslator) (SELinuxLabelInfo, error) {
info := SELinuxLabelInfo{}
if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
return info, nil
}
if !seLinuxTranslator.SELinuxEnabled() {
return info, nil
}
pluginSupportsSELinuxContextMount, err := SupportsSELinuxContextMount(volumeSpec, volumePluginMgr)
if err != nil {
return info, err
}
info.PluginSupportsSELinuxContextMount = pluginSupportsSELinuxContextMount
// Collect all SELinux options from all containers that use this volume.
// A set will squash any duplicities.
labels := sets.New[string]()
for _, containerLabel := range effectiveSELinuxContainerLabels {
lbl, err := seLinuxTranslator.SELinuxOptionsToFileLabel(containerLabel)
if err != nil {
fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerLabel, err)
return info, fullErr
}
labels.Insert(lbl)
}
// Ensure that all containers use the same SELinux label.
if labels.Len() > 1 {
// This volume is used with more than one SELinux label in the pod.
return info, &MultipleSELinuxLabelsError{labels: labels.UnsortedList()}
}
if labels.Len() == 0 {
return info, nil
}
lbl, _ := labels.PopAny()
info.SELinuxProcessLabel = lbl
info.SELinuxMountLabel = lbl
if utilfeature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) &&
podSecurityContext != nil &&
podSecurityContext.SELinuxChangePolicy != nil &&
*podSecurityContext.SELinuxChangePolicy == v1.SELinuxChangePolicyRecursive {
// The pod has opted into recursive SELinux label changes. Do not mount with -o context.
info.SELinuxMountLabel = ""
}
if !pluginSupportsSELinuxContextMount {
// The volume plugin does not support SELinux mount. Do not mount with -o context.
info.SELinuxMountLabel = ""
}
return info, nil
}

View File

@ -0,0 +1,85 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"sort"
storagev1 "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
storagev1listers "k8s.io/client-go/listers/storage/v1"
"k8s.io/klog/v2"
)
const (
// isDefaultStorageClassAnnotation represents a StorageClass annotation that
// marks a class as the default StorageClass
IsDefaultStorageClassAnnotation = "storageclass.kubernetes.io/is-default-class"
// betaIsDefaultStorageClassAnnotation is the beta version of IsDefaultStorageClassAnnotation.
// TODO: remove Beta when no longer used
BetaIsDefaultStorageClassAnnotation = "storageclass.beta.kubernetes.io/is-default-class"
)
// GetDefaultClass returns the default StorageClass from the store, or nil.
func GetDefaultClass(lister storagev1listers.StorageClassLister) (*storagev1.StorageClass, error) {
list, err := lister.List(labels.Everything())
if err != nil {
return nil, err
}
defaultClasses := []*storagev1.StorageClass{}
for _, class := range list {
if IsDefaultAnnotation(class.ObjectMeta) {
defaultClasses = append(defaultClasses, class)
klog.V(4).Infof("GetDefaultClass added: %s", class.Name)
}
}
if len(defaultClasses) == 0 {
return nil, nil
}
// Primary sort by creation timestamp, newest first
// Secondary sort by class name, ascending order
sort.Slice(defaultClasses, func(i, j int) bool {
if defaultClasses[i].CreationTimestamp.UnixNano() == defaultClasses[j].CreationTimestamp.UnixNano() {
return defaultClasses[i].Name < defaultClasses[j].Name
}
return defaultClasses[i].CreationTimestamp.UnixNano() > defaultClasses[j].CreationTimestamp.UnixNano()
})
if len(defaultClasses) > 1 {
klog.V(4).Infof("%d default StorageClasses were found, choosing: %s", len(defaultClasses), defaultClasses[0].Name)
}
return defaultClasses[0], nil
}
// IsDefaultAnnotation returns a boolean if the default storage class
// annotation is set
// TODO: remove Beta when no longer needed
func IsDefaultAnnotation(obj metav1.ObjectMeta) bool {
if obj.Annotations[IsDefaultStorageClassAnnotation] == "true" {
return true
}
if obj.Annotations[BetaIsDefaultStorageClassAnnotation] == "true" {
return true
}
return false
}

View File

@ -0,0 +1,12 @@
# See the OWNERS docs at https://go.k8s.io/owners
reviewers:
- jingxu97
- saad-ali
- jsafrane
- msau42
- andyzhangx
approvers:
- jingxu97
- saad-ali
- jsafrane

View File

@ -0,0 +1,92 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package subpath
import "os"
// Interface defines the set of methods all subpathers must implement
type Interface interface {
// CleanSubPaths removes any bind-mounts created by PrepareSafeSubpath in given
// pod volume directory.
CleanSubPaths(poodDir string, volumeName string) error
// PrepareSafeSubpath does everything that's necessary to prepare a subPath
// that's 1) inside given volumePath and 2) immutable after this call.
//
// newHostPath - location of prepared subPath. It should be used instead of
// hostName when running the container.
// cleanupAction - action to run when the container is running or it failed to start.
//
// CleanupAction must be called immediately after the container with given
// subpath starts. On the other hand, Interface.CleanSubPaths must be called
// when the pod finishes.
PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error)
// SafeMakeDir creates subdir within given base. It makes sure that the
// created directory does not escape given base directory mis-using
// symlinks. Note that the function makes sure that it creates the directory
// somewhere under the base, nothing else. E.g. if the directory already
// exists, it may exist outside of the base due to symlinks.
// This method should be used if the directory to create is inside volume
// that's under user control. User must not be able to use symlinks to
// escape the volume to create directories somewhere else.
SafeMakeDir(subdir string, base string, perm os.FileMode) error
}
// Subpath defines the attributes of a subpath
type Subpath struct {
// index of the VolumeMount for this container
VolumeMountIndex int
// Full path to the subpath directory on the host
Path string
// name of the volume that is a valid directory name.
VolumeName string
// Full path to the volume path
VolumePath string
// Path to the pod's directory, including pod UID
PodDir string
// Name of the container
ContainerName string
}
// Compile time-check for all implementers of subpath interface
var _ Interface = &subpath{}
var _ Interface = &FakeSubpath{}
// FakeSubpath is a subpather implementation for testing
type FakeSubpath struct{}
// PrepareSafeSubpath is a fake implementation of PrepareSafeSubpath. Always returns
// newHostPath == subPath.Path
func (fs *FakeSubpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return subPath.Path, nil, nil
}
// CleanSubPaths is a fake implementation of CleanSubPaths. It is a noop
func (fs *FakeSubpath) CleanSubPaths(podDir string, volumeName string) error {
return nil
}
// SafeMakeDir is a fake implementation of SafeMakeDir. It is a noop
func (fs *FakeSubpath) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return nil
}

View File

@ -0,0 +1,609 @@
//go:build linux
// +build linux
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package subpath
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
)
const (
// place for subpath mounts
// TODO: pass in directory using kubelet_getters instead
containerSubPathDirectoryName = "volume-subpaths"
// syscall.Openat flags used to traverse directories not following symlinks
nofollowFlags = unix.O_RDONLY | unix.O_NOFOLLOW
// flags for getting file descriptor without following the symlink
openFDFlags = unix.O_NOFOLLOW | unix.O_PATH
)
type subpath struct {
mounter mount.Interface
}
// New returns a subpath.Interface for the current system
func New(mounter mount.Interface) Interface {
return &subpath{
mounter: mounter,
}
}
func (sp *subpath) CleanSubPaths(podDir string, volumeName string) error {
return doCleanSubPaths(sp.mounter, podDir, volumeName)
}
func (sp *subpath) SafeMakeDir(subdir string, base string, perm os.FileMode) error {
realBase, err := filepath.EvalSymlinks(base)
if err != nil {
return fmt.Errorf("error resolving symlinks in %s: %s", base, err)
}
realFullPath := filepath.Join(realBase, subdir)
return doSafeMakeDir(realFullPath, realBase, perm)
}
func (sp *subpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
newHostPath, err = doBindSubPath(sp.mounter, subPath)
// There is no action when the container starts. Bind-mount will be cleaned
// when container stops by CleanSubPaths.
cleanupAction = nil
return newHostPath, cleanupAction, err
}
// This implementation is shared between Linux and NsEnter
func safeOpenSubPath(mounter mount.Interface, subpath Subpath) (int, error) {
if !mount.PathWithinBase(subpath.Path, subpath.VolumePath) {
return -1, fmt.Errorf("subpath %q not within volume path %q", subpath.Path, subpath.VolumePath)
}
fd, err := doSafeOpen(subpath.Path, subpath.VolumePath)
if err != nil {
return -1, fmt.Errorf("error opening subpath %v: %v", subpath.Path, err)
}
return fd, nil
}
// prepareSubpathTarget creates target for bind-mount of subpath. It returns
// "true" when the target already exists and something is mounted there.
// Given Subpath must have all paths with already resolved symlinks and with
// paths relevant to kubelet (when it runs in a container).
// This function is called also by NsEnterMounter. It works because
// /var/lib/kubelet is mounted from the host into the container with Kubelet as
// /var/lib/kubelet too.
func prepareSubpathTarget(mounter mount.Interface, subpath Subpath) (bool, string, error) {
// Early check for already bind-mounted subpath.
bindPathTarget := getSubpathBindTarget(subpath)
notMount, err := mount.IsNotMountPoint(mounter, bindPathTarget)
if err != nil {
if !os.IsNotExist(err) {
return false, "", fmt.Errorf("error checking path %s for mount: %s", bindPathTarget, err)
}
// Ignore ErrorNotExist: the file/directory will be created below if it does not exist yet.
notMount = true
}
if !notMount {
// It's already mounted, so check if it's bind-mounted to the same path
samePath, err := checkSubPathFileEqual(subpath, bindPathTarget)
if err != nil {
return false, "", fmt.Errorf("error checking subpath mount info for %s: %s", bindPathTarget, err)
}
if !samePath {
// It's already mounted but not what we want, unmount it
if err = mounter.Unmount(bindPathTarget); err != nil {
return false, "", fmt.Errorf("error ummounting %s: %s", bindPathTarget, err)
}
} else {
// It's already mounted
klog.V(5).Infof("Skipping bind-mounting subpath %s: already mounted", bindPathTarget)
return true, bindPathTarget, nil
}
}
// bindPathTarget is in /var/lib/kubelet and thus reachable without any
// translation even to containerized kubelet.
bindParent := filepath.Dir(bindPathTarget)
err = os.MkdirAll(bindParent, 0750)
if err != nil && !os.IsExist(err) {
return false, "", fmt.Errorf("error creating directory %s: %s", bindParent, err)
}
t, err := os.Lstat(subpath.Path)
if err != nil {
return false, "", fmt.Errorf("lstat %s failed: %s", subpath.Path, err)
}
if t.Mode()&os.ModeDir > 0 {
if err = os.Mkdir(bindPathTarget, 0750); err != nil && !os.IsExist(err) {
return false, "", fmt.Errorf("error creating directory %s: %s", bindPathTarget, err)
}
} else {
// "/bin/touch <bindPathTarget>".
// A file is enough for all possible targets (symlink, device, pipe,
// socket, ...), bind-mounting them into a file correctly changes type
// of the target file.
if err = ioutil.WriteFile(bindPathTarget, []byte{}, 0640); err != nil {
return false, "", fmt.Errorf("error creating file %s: %s", bindPathTarget, err)
}
}
return false, bindPathTarget, nil
}
func checkSubPathFileEqual(subpath Subpath, bindMountTarget string) (bool, error) {
s, err := os.Lstat(subpath.Path)
if err != nil {
return false, fmt.Errorf("stat %s failed: %s", subpath.Path, err)
}
t, err := os.Lstat(bindMountTarget)
if err != nil {
return false, fmt.Errorf("lstat %s failed: %s", bindMountTarget, err)
}
if !os.SameFile(s, t) {
return false, nil
}
return true, nil
}
func getSubpathBindTarget(subpath Subpath) string {
// containerName is DNS label, i.e. safe as a directory name.
return filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName, strconv.Itoa(subpath.VolumeMountIndex))
}
func doBindSubPath(mounter mount.Interface, subpath Subpath) (hostPath string, err error) {
// Linux, kubelet runs on the host:
// - safely open the subpath
// - bind-mount /proc/<pid of kubelet>/fd/<fd> to subpath target
// User can't change /proc/<pid of kubelet>/fd/<fd> to point to a bad place.
// Evaluate all symlinks here once for all subsequent functions.
newVolumePath, err := filepath.EvalSymlinks(subpath.VolumePath)
if err != nil {
return "", fmt.Errorf("error resolving symlinks in %q: %v", subpath.VolumePath, err)
}
newPath, err := filepath.EvalSymlinks(subpath.Path)
if err != nil {
return "", fmt.Errorf("error resolving symlinks in %q: %v", subpath.Path, err)
}
klog.V(5).Infof("doBindSubPath %q (%q) for volumepath %q", subpath.Path, newPath, subpath.VolumePath)
subpath.VolumePath = newVolumePath
subpath.Path = newPath
fd, err := safeOpenSubPath(mounter, subpath)
if err != nil {
return "", err
}
defer syscall.Close(fd)
alreadyMounted, bindPathTarget, err := prepareSubpathTarget(mounter, subpath)
if err != nil {
return "", err
}
if alreadyMounted {
return bindPathTarget, nil
}
success := false
defer func() {
// Cleanup subpath on error
if !success {
klog.V(4).Infof("doBindSubPath() failed for %q, cleaning up subpath", bindPathTarget)
if cleanErr := cleanSubPath(mounter, subpath); cleanErr != nil {
klog.Errorf("Failed to clean subpath %q: %v", bindPathTarget, cleanErr)
}
}
}()
kubeletPid := os.Getpid()
mountSource := fmt.Sprintf("/proc/%d/fd/%v", kubeletPid, fd)
// Do the bind mount
options := []string{"bind"}
mountFlags := []string{"--no-canonicalize"}
klog.V(5).Infof("bind mounting %q at %q", mountSource, bindPathTarget)
if err = mounter.MountSensitiveWithoutSystemdWithMountFlags(mountSource, bindPathTarget, "" /*fstype*/, options, nil /* sensitiveOptions */, mountFlags); err != nil {
return "", fmt.Errorf("error mounting %s: %s", subpath.Path, err)
}
success = true
klog.V(3).Infof("Bound SubPath %s into %s", subpath.Path, bindPathTarget)
return bindPathTarget, nil
}
// This implementation is shared between Linux and NsEnter
func doCleanSubPaths(mounter mount.Interface, podDir string, volumeName string) error {
// scan /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/*
subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName)
klog.V(4).Infof("Cleaning up subpath mounts for %s", subPathDir)
containerDirs, err := ioutil.ReadDir(subPathDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("error reading %s: %s", subPathDir, err)
}
for _, containerDir := range containerDirs {
if !containerDir.IsDir() {
klog.V(4).Infof("Container file is not a directory: %s", containerDir.Name())
continue
}
klog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name())
// scan /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/<container name>/*
fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name())
// The original traversal method here was ReadDir, which was not so robust to handle some error such as "stale NFS file handle",
// so it was replaced with filepath.Walk in a later patch, which can pass through error and handled by the callback WalkFunc.
// After go 1.16, WalkDir was introduced, it's more effective than Walk because the callback WalkDirFunc is called before
// reading a directory, making it save some time when a container's subPath contains lots of dirs.
// See https://github.com/kubernetes/kubernetes/pull/71804 and https://github.com/kubernetes/kubernetes/issues/107667 for more details.
err = filepath.WalkDir(fullContainerDirPath, func(path string, info os.DirEntry, _ error) error {
if path == fullContainerDirPath {
// Skip top level directory
return nil
}
// pass through errors and let doCleanSubPath handle them
if err = doCleanSubPath(mounter, fullContainerDirPath, filepath.Base(path)); err != nil {
return err
}
// We need to check that info is not nil. This may happen when the incoming err is not nil due to stale mounts or permission errors.
if info != nil && info.IsDir() {
// skip subdirs of the volume: it only matters the first level to unmount, otherwise it would try to unmount subdir of the volume
return filepath.SkipDir
}
return nil
})
if err != nil {
return fmt.Errorf("error processing %s: %s", fullContainerDirPath, err)
}
// Whole container has been processed, remove its directory.
if err := os.Remove(fullContainerDirPath); err != nil {
return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err)
}
klog.V(5).Infof("Removed %s", fullContainerDirPath)
}
// Whole pod volume subpaths have been cleaned up, remove its subpath directory.
if err := os.Remove(subPathDir); err != nil {
return fmt.Errorf("error deleting %s: %s", subPathDir, err)
}
klog.V(5).Infof("Removed %s", subPathDir)
// Remove entire subpath directory if it's the last one
podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName)
if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) {
return fmt.Errorf("error deleting %s: %s", podSubPathDir, err)
}
klog.V(5).Infof("Removed %s", podSubPathDir)
return nil
}
// doCleanSubPath tears down the single subpath bind mount
func doCleanSubPath(mounter mount.Interface, fullContainerDirPath, subPathIndex string) error {
// process /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/<container name>/<subPathName>
klog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex)
fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex)
if err := mount.CleanupMountPoint(fullSubPath, mounter, true); err != nil {
return fmt.Errorf("error cleaning subpath mount %s: %s", fullSubPath, err)
}
klog.V(4).Infof("Successfully cleaned subpath directory %s", fullSubPath)
return nil
}
// cleanSubPath will teardown the subpath bind mount and any remove any directories if empty
func cleanSubPath(mounter mount.Interface, subpath Subpath) error {
containerDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName)
// Clean subdir bindmount
if err := doCleanSubPath(mounter, containerDir, strconv.Itoa(subpath.VolumeMountIndex)); err != nil && !os.IsNotExist(err) {
return err
}
// Recusively remove directories if empty
if err := removeEmptyDirs(subpath.PodDir, containerDir); err != nil {
return err
}
return nil
}
// removeEmptyDirs works backwards from endDir to baseDir and removes each directory
// if it is empty. It stops once it encounters a directory that has content
func removeEmptyDirs(baseDir, endDir string) error {
if !mount.PathWithinBase(endDir, baseDir) {
return fmt.Errorf("endDir %q is not within baseDir %q", endDir, baseDir)
}
for curDir := endDir; curDir != baseDir; curDir = filepath.Dir(curDir) {
s, err := os.Stat(curDir)
if err != nil {
if os.IsNotExist(err) {
klog.V(5).Infof("curDir %q doesn't exist, skipping", curDir)
continue
}
return fmt.Errorf("error stat %q: %v", curDir, err)
}
if !s.IsDir() {
return fmt.Errorf("path %q not a directory", curDir)
}
err = os.Remove(curDir)
if os.IsExist(err) {
klog.V(5).Infof("Directory %q not empty, not removing", curDir)
break
} else if err != nil {
return fmt.Errorf("error removing directory %q: %v", curDir, err)
}
klog.V(5).Infof("Removed directory %q", curDir)
}
return nil
}
// This implementation is shared between Linux and NsEnterMounter. Both pathname
// and base must be either already resolved symlinks or thet will be resolved in
// kubelet's mount namespace (in case it runs containerized).
func doSafeMakeDir(pathname string, base string, perm os.FileMode) error {
klog.V(4).Infof("Creating directory %q within base %q", pathname, base)
if !mount.PathWithinBase(pathname, base) {
return fmt.Errorf("path %s is outside of allowed base %s", pathname, base)
}
// Quick check if the directory already exists
s, err := os.Stat(pathname)
if err == nil {
// Path exists
if s.IsDir() {
// The directory already exists. It can be outside of the parent,
// but there is no race-proof check.
klog.V(4).Infof("Directory %s already exists", pathname)
return nil
}
return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR}
}
// Find all existing directories
existingPath, toCreate, err := findExistingPrefix(base, pathname)
if err != nil {
return fmt.Errorf("error opening directory %s: %s", pathname, err)
}
// Ensure the existing directory is inside allowed base
fullExistingPath, err := filepath.EvalSymlinks(existingPath)
if err != nil {
return fmt.Errorf("error opening directory %s: %s", existingPath, err)
}
if !mount.PathWithinBase(fullExistingPath, base) {
return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err)
}
klog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...))
parentFD, err := doSafeOpen(fullExistingPath, base)
if err != nil {
return fmt.Errorf("cannot open directory %s: %s", existingPath, err)
}
childFD := -1
defer func() {
if parentFD != -1 {
if err = syscall.Close(parentFD); err != nil {
klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err)
}
}
if childFD != -1 {
if err = syscall.Close(childFD); err != nil {
klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err)
}
}
}()
currentPath := fullExistingPath
// create the directories one by one, making sure nobody can change
// created directory into symlink.
for _, dir := range toCreate {
currentPath = filepath.Join(currentPath, dir)
klog.V(4).Infof("Creating %s", dir)
err = syscall.Mkdirat(parentFD, currentPath, uint32(perm))
if err != nil {
return fmt.Errorf("cannot create directory %s: %s", currentPath, err)
}
// Dive into the created directory
childFD, err = syscall.Openat(parentFD, dir, nofollowFlags|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("cannot open %s: %s", currentPath, err)
}
// We can be sure that childFD is safe to use. It could be changed
// by user after Mkdirat() and before Openat(), however:
// - it could not be changed to symlink - we use nofollowFlags
// - it could be changed to a file (or device, pipe, socket, ...)
// but either subsequent Mkdirat() fails or we mount this file
// to user's container. Security is no violated in both cases
// and user either gets error or the file that it can already access.
if err = syscall.Close(parentFD); err != nil {
klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err)
}
parentFD = childFD
childFD = -1
// Everything was created. mkdirat(..., perm) above was affected by current
// umask and we must apply the right permissions to the all created directory.
// (that's the one that will be available to the container as subpath)
// so user can read/write it.
// parentFD is the last created directory.
// Translate perm (os.FileMode) to uint32 that fchmod() expects
kernelPerm := uint32(perm & os.ModePerm)
if perm&os.ModeSetgid > 0 {
kernelPerm |= syscall.S_ISGID
}
if perm&os.ModeSetuid > 0 {
kernelPerm |= syscall.S_ISUID
}
if perm&os.ModeSticky > 0 {
kernelPerm |= syscall.S_ISVTX
}
if err = syscall.Fchmod(parentFD, kernelPerm); err != nil {
return fmt.Errorf("chmod %q failed: %s", currentPath, err)
}
}
return nil
}
// findExistingPrefix finds prefix of pathname that exists. In addition, it
// returns list of remaining directories that don't exist yet.
func findExistingPrefix(base, pathname string) (string, []string, error) {
rel, err := filepath.Rel(base, pathname)
if err != nil {
return base, nil, err
}
dirs := strings.Split(rel, string(filepath.Separator))
// Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks.
// This should be faster than looping through all dirs and calling os.Stat()
// on each of them, as the symlinks are resolved only once with OpenAt().
currentPath := base
fd, err := syscall.Open(currentPath, syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err)
}
defer func() {
if err = syscall.Close(fd); err != nil {
klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err)
}
}()
for i, dir := range dirs {
// Using O_PATH here will prevent hangs in case user replaces directory with
// fifo
childFD, err := syscall.Openat(fd, dir, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
if os.IsNotExist(err) {
return currentPath, dirs[i:], nil
}
return base, nil, err
}
if err = syscall.Close(fd); err != nil {
klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err)
}
fd = childFD
currentPath = filepath.Join(currentPath, dir)
}
return pathname, []string{}, nil
}
// This implementation is shared between Linux and NsEnterMounter
// Open path and return its fd.
// Symlinks are disallowed (pathname must already resolve symlinks),
// and the path must be within the base directory.
func doSafeOpen(pathname string, base string) (int, error) {
pathname = filepath.Clean(pathname)
base = filepath.Clean(base)
// Calculate segments to follow
subpath, err := filepath.Rel(base, pathname)
if err != nil {
return -1, err
}
segments := strings.Split(subpath, string(filepath.Separator))
// Assumption: base is the only directory that we have under control.
// Base dir is not allowed to be a symlink.
parentFD, err := syscall.Open(base, nofollowFlags|unix.O_CLOEXEC, 0)
if err != nil {
return -1, fmt.Errorf("cannot open directory %s: %s", base, err)
}
defer func() {
if parentFD != -1 {
if err = syscall.Close(parentFD); err != nil {
klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err)
}
}
}()
childFD := -1
defer func() {
if childFD != -1 {
if err = syscall.Close(childFD); err != nil {
klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err)
}
}
}()
currentPath := base
// Follow the segments one by one using openat() to make
// sure the user cannot change already existing directories into symlinks.
for _, seg := range segments {
var deviceStat unix.Stat_t
currentPath = filepath.Join(currentPath, seg)
if !mount.PathWithinBase(currentPath, base) {
return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base)
}
// Trigger auto mount if it's an auto-mounted directory, ignore error if not a directory.
// Notice the trailing slash is mandatory, see "automount" in openat(2) and open_by_handle_at(2).
unix.Fstatat(parentFD, seg+"/", &deviceStat, unix.AT_SYMLINK_NOFOLLOW)
klog.V(5).Infof("Opening path %s", currentPath)
childFD, err = syscall.Openat(parentFD, seg, openFDFlags|unix.O_CLOEXEC, 0)
if err != nil {
return -1, fmt.Errorf("cannot open %s: %s", currentPath, err)
}
err := unix.Fstat(childFD, &deviceStat)
if err != nil {
return -1, fmt.Errorf("error running fstat on %s with %v", currentPath, err)
}
fileFmt := deviceStat.Mode & syscall.S_IFMT
if fileFmt == syscall.S_IFLNK {
return -1, fmt.Errorf("unexpected symlink found %s", currentPath)
}
// Close parentFD
if err = syscall.Close(parentFD); err != nil {
return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err)
}
// Set child to new parent
parentFD = childFD
childFD = -1
}
// We made it to the end, return this fd, don't close it
finalFD := parentFD
parentFD = -1
return finalFD, nil
}

View File

@ -0,0 +1,55 @@
//go:build !linux && !windows
// +build !linux,!windows
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package subpath
import (
"errors"
"os"
"k8s.io/mount-utils"
"k8s.io/utils/nsenter"
)
type subpath struct{}
var errUnsupported = errors.New("util/subpath on this platform is not supported")
// New returns a subpath.Interface for the current system.
func New(mount.Interface) Interface {
return &subpath{}
}
// NewNSEnter is to satisfy the compiler for having NewSubpathNSEnter exist for all
// OS choices. however, NSEnter is only valid on Linux
func NewNSEnter(mounter mount.Interface, ne *nsenter.Nsenter, rootDir string) Interface {
return nil
}
func (sp *subpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return subPath.Path, nil, errUnsupported
}
func (sp *subpath) CleanSubPaths(podDir string, volumeName string) error {
return errUnsupported
}
func (sp *subpath) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return errUnsupported
}

View File

@ -0,0 +1,383 @@
//go:build windows
// +build windows
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package subpath
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
"k8s.io/utils/nsenter"
)
// MaxPathLength is the maximum length of Windows path. Normally, it is 260, but if long path is enable,
// the max number is 32,767
const MaxPathLength = 32767
type subpath struct{}
// New returns a subpath.Interface for the current system
func New(mount.Interface) Interface {
return &subpath{}
}
// NewNSEnter is to satisfy the compiler for having NewSubpathNSEnter exist for all
// OS choices. however, NSEnter is only valid on Linux
func NewNSEnter(mounter mount.Interface, ne *nsenter.Nsenter, rootDir string) Interface {
return nil
}
// isDriveLetterPath returns true if the given path is empty or it ends with ":" or ":\" or ":\\"
func isDriveLetterorEmptyPath(path string) bool {
if path == "" || strings.HasSuffix(path, ":\\\\") || strings.HasSuffix(path, ":") || strings.HasSuffix(path, ":\\") {
return true
}
return false
}
// isVolumePrefix returns true if the given path name starts with "Volume" or volume prefix including
// "\\.\", "\\?\" for device path or "UNC" or "\\" for UNC path. Otherwise, it returns false.
func isDeviceOrUncPath(path string) bool {
if strings.HasPrefix(path, "Volume") || strings.HasPrefix(path, "\\\\?\\") || strings.HasPrefix(path, "\\\\.\\") || strings.HasPrefix(path, "UNC") {
return true
}
return false
}
// getUpperPath removes the last level of directory.
func getUpperPath(path string) string {
sep := fmt.Sprintf("%c", filepath.Separator)
upperpath := strings.TrimSuffix(path, sep)
return filepath.Dir(upperpath)
}
// Check whether a directory/file is a link type or not
// LinkType could be SymbolicLink, Junction, or HardLink
func isLinkPath(path string) (bool, error) {
cmd := exec.Command("powershell", "/c", "$ErrorActionPreference = 'Stop'; (Get-Item -Force -LiteralPath $env:linkpath).LinkType")
cmd.Env = append(os.Environ(), fmt.Sprintf("linkpath=%s", path))
klog.V(8).Infof("Executing command: %q", cmd.String())
output, err := cmd.CombinedOutput()
if err != nil {
return false, err
}
if strings.TrimSpace(string(output)) != "" {
return true, nil
}
return false, nil
}
// evalSymlink returns the path name after the evaluation of any symbolic links.
// If the path after evaluation is a device path or network connection, the original path is returned
func evalSymlink(path string) (string, error) {
path = mount.NormalizeWindowsPath(path)
if isDeviceOrUncPath(path) || isDriveLetterorEmptyPath(path) {
klog.V(4).Infof("Path '%s' is not a symlink, return its original form.", path)
return path, nil
}
upperpath := path
base := ""
for i := 0; i < MaxPathLength; i++ {
isLink, err := isLinkPath(upperpath)
if err != nil {
return "", err
}
if isLink {
break
}
// continue to check next layer
base = filepath.Join(filepath.Base(upperpath), base)
upperpath = getUpperPath(upperpath)
if isDriveLetterorEmptyPath(upperpath) {
klog.V(4).Infof("Path '%s' is not a symlink, return its original form.", path)
return path, nil
}
}
// This command will give the target path of a given symlink
// The -Force parameter will allow Get-Item to also evaluate hidden folders, like AppData.
cmd := exec.Command("powershell", "/c", "$ErrorActionPreference = 'Stop'; (Get-Item -Force -LiteralPath $env:linkpath).Target")
cmd.Env = append(os.Environ(), fmt.Sprintf("linkpath=%s", upperpath))
klog.V(8).Infof("Executing command: %q", cmd.String())
output, err := cmd.CombinedOutput()
if err != nil {
return "", err
}
klog.V(4).Infof("evaluate path %s: symlink from %s to %s", path, upperpath, string(output))
linkedPath := strings.TrimSpace(string(output))
if linkedPath == "" || isDeviceOrUncPath(linkedPath) {
klog.V(4).Infof("Path '%s' has a target %s. Return its original form.", path, linkedPath)
return path, nil
}
// If the target is not an absolute path, join iit with the current upperpath
if !filepath.IsAbs(linkedPath) {
linkedPath = filepath.Join(getUpperPath(upperpath), linkedPath)
}
nextLink, err := evalSymlink(linkedPath)
if err != nil {
return path, err
}
return filepath.Join(nextLink, base), nil
}
// check whether hostPath is within volume path
// this func will lock all intermediate subpath directories, need to close handle outside of this func after container started
func lockAndCheckSubPath(volumePath, hostPath string) ([]uintptr, error) {
if len(volumePath) == 0 || len(hostPath) == 0 {
return []uintptr{}, nil
}
finalSubPath, err := evalSymlink(hostPath)
if err != nil {
return []uintptr{}, fmt.Errorf("cannot evaluate link %s: %s", hostPath, err)
}
finalVolumePath, err := evalSymlink(volumePath)
if err != nil {
return []uintptr{}, fmt.Errorf("cannot read link %s: %s", volumePath, err)
}
return lockAndCheckSubPathWithoutSymlink(finalVolumePath, finalSubPath)
}
// lock all intermediate subPath directories and check they are all within volumePath
// volumePath & subPath should not contain any symlink, otherwise it will return error
func lockAndCheckSubPathWithoutSymlink(volumePath, subPath string) ([]uintptr, error) {
if len(volumePath) == 0 || len(subPath) == 0 {
return []uintptr{}, nil
}
// get relative path to volumePath
relSubPath, err := filepath.Rel(volumePath, subPath)
if err != nil {
return []uintptr{}, fmt.Errorf("Rel(%s, %s) error: %v", volumePath, subPath, err)
}
if mount.StartsWithBackstep(relSubPath) {
return []uintptr{}, fmt.Errorf("SubPath %q not within volume path %q", subPath, volumePath)
}
if relSubPath == "." {
// volumePath and subPath are equal
return []uintptr{}, nil
}
fileHandles := []uintptr{}
var errorResult error
currentFullPath := volumePath
dirs := strings.Split(relSubPath, string(os.PathSeparator))
for _, dir := range dirs {
// lock intermediate subPath directory first
currentFullPath = filepath.Join(currentFullPath, dir)
handle, err := lockPath(currentFullPath)
if err != nil {
errorResult = fmt.Errorf("cannot lock path %s: %s", currentFullPath, err)
break
}
fileHandles = append(fileHandles, handle)
// make sure intermediate subPath directory does not contain symlink any more
stat, err := os.Lstat(currentFullPath)
if err != nil {
errorResult = fmt.Errorf("Lstat(%q) error: %v", currentFullPath, err)
break
}
if stat.Mode()&os.ModeSymlink != 0 {
errorResult = fmt.Errorf("subpath %q is an unexpected symlink after EvalSymlinks", currentFullPath)
break
}
if !mount.PathWithinBase(currentFullPath, volumePath) {
errorResult = fmt.Errorf("SubPath %q not within volume path %q", currentFullPath, volumePath)
break
}
}
return fileHandles, errorResult
}
// unlockPath unlock directories
func unlockPath(fileHandles []uintptr) {
if fileHandles != nil {
for _, handle := range fileHandles {
syscall.CloseHandle(syscall.Handle(handle))
}
}
}
// lockPath locks a directory or symlink, return handle, exec "syscall.CloseHandle(handle)" to unlock the path
func lockPath(path string) (uintptr, error) {
if len(path) == 0 {
return uintptr(syscall.InvalidHandle), syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return uintptr(syscall.InvalidHandle), err
}
access := uint32(syscall.GENERIC_READ)
sharemode := uint32(syscall.FILE_SHARE_READ)
createmode := uint32(syscall.OPEN_EXISTING)
flags := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS | syscall.FILE_FLAG_OPEN_REPARSE_POINT)
fd, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, flags, 0)
return uintptr(fd), err
}
// Lock all directories in subPath and check they're not symlinks.
func (sp *subpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
handles, err := lockAndCheckSubPath(subPath.VolumePath, subPath.Path)
// Unlock the directories when the container starts
cleanupAction = func() {
unlockPath(handles)
}
return subPath.Path, cleanupAction, err
}
// No bind-mounts for subpaths are necessary on Windows
func (sp *subpath) CleanSubPaths(podDir string, volumeName string) error {
return nil
}
// SafeMakeDir makes sure that the created directory does not escape given base directory mis-using symlinks.
func (sp *subpath) SafeMakeDir(subdir string, base string, perm os.FileMode) error {
realBase, err := evalSymlink(base)
if err != nil {
return fmt.Errorf("error resolving symlinks in %s: %s", base, err)
}
realFullPath := filepath.Join(realBase, subdir)
return doSafeMakeDir(realFullPath, realBase, perm)
}
func doSafeMakeDir(pathname string, base string, perm os.FileMode) error {
klog.V(4).Infof("Creating directory %q within base %q", pathname, base)
if !mount.PathWithinBase(pathname, base) {
return fmt.Errorf("path %s is outside of allowed base %s", pathname, base)
}
// Quick check if the directory already exists
s, err := os.Stat(pathname)
if err == nil {
// Path exists
if s.IsDir() {
// The directory already exists. It can be outside of the parent,
// but there is no race-proof check.
klog.V(4).Infof("Directory %s already exists", pathname)
return nil
}
return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR}
}
// Find all existing directories
existingPath, toCreate, err := findExistingPrefix(base, pathname)
if err != nil {
return fmt.Errorf("error opening directory %s: %s", pathname, err)
}
if len(toCreate) == 0 {
return nil
}
// Ensure the existing directory is inside allowed base
fullExistingPath, err := evalSymlink(existingPath)
if err != nil {
return fmt.Errorf("error opening existing directory %s: %s", existingPath, err)
}
fullBasePath, err := evalSymlink(base)
if err != nil {
return fmt.Errorf("cannot read link %s: %s", base, err)
}
if !mount.PathWithinBase(fullExistingPath, fullBasePath) {
return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err)
}
// lock all intermediate directories from fullBasePath to fullExistingPath (top to bottom)
fileHandles, err := lockAndCheckSubPathWithoutSymlink(fullBasePath, fullExistingPath)
defer unlockPath(fileHandles)
if err != nil {
return err
}
klog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...))
currentPath := fullExistingPath
// create the directories one by one, making sure nobody can change
// created directory into symlink by lock that directory immediately
for _, dir := range toCreate {
currentPath = filepath.Join(currentPath, dir)
klog.V(4).Infof("Creating %s", dir)
if err := os.Mkdir(currentPath, perm); err != nil {
return fmt.Errorf("cannot create directory %s: %s", currentPath, err)
}
handle, err := lockPath(currentPath)
if err != nil {
return fmt.Errorf("cannot lock path %s: %s", currentPath, err)
}
defer syscall.CloseHandle(syscall.Handle(handle))
// make sure newly created directory does not contain symlink after lock
stat, err := os.Lstat(currentPath)
if err != nil {
return fmt.Errorf("Lstat(%q) error: %v", currentPath, err)
}
if stat.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("subpath %q is an unexpected symlink after Mkdir", currentPath)
}
}
return nil
}
// findExistingPrefix finds prefix of pathname that exists. In addition, it
// returns list of remaining directories that don't exist yet.
func findExistingPrefix(base, pathname string) (string, []string, error) {
rel, err := filepath.Rel(base, pathname)
if err != nil {
return base, nil, err
}
if mount.StartsWithBackstep(rel) {
return base, nil, fmt.Errorf("pathname(%s) is not within base(%s)", pathname, base)
}
if rel == "." {
// base and pathname are equal
return pathname, []string{}, nil
}
dirs := strings.Split(rel, string(filepath.Separator))
var parent string
currentPath := base
for i, dir := range dirs {
parent = currentPath
currentPath = filepath.Join(parent, dir)
if _, err := os.Lstat(currentPath); err != nil {
if os.IsNotExist(err) {
return parent, dirs[i:], nil
}
return base, nil, err
}
}
return pathname, []string{}, nil
}

View File

@ -0,0 +1,206 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package types defines types used only by volume components
package types
import (
"errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/mount-utils"
)
// UniquePodName defines the type to key pods off of
type UniquePodName types.UID
// UniquePVCName defines the type to key pvc off
type UniquePVCName types.UID
// GeneratedOperations contains the operation that is created as well as
// supporting functions required for the operation executor
type GeneratedOperations struct {
// Name of operation - could be used for resetting shared exponential backoff
OperationName string
OperationFunc func() (context OperationContext)
EventRecorderFunc func(*error)
CompleteFunc func(CompleteFuncParam)
}
type OperationContext struct {
EventErr error
DetailedErr error
Migrated bool
}
func NewOperationContext(eventErr, detailedErr error, migrated bool) OperationContext {
return OperationContext{
EventErr: eventErr,
DetailedErr: detailedErr,
Migrated: migrated,
}
}
type CompleteFuncParam struct {
Err *error
Migrated *bool
}
// Run executes the operations and its supporting functions
func (o *GeneratedOperations) Run() (eventErr, detailedErr error) {
var context OperationContext
if o.CompleteFunc != nil {
c := CompleteFuncParam{
Err: &context.DetailedErr,
Migrated: &context.Migrated,
}
defer o.CompleteFunc(c)
}
if o.EventRecorderFunc != nil {
defer o.EventRecorderFunc(&eventErr)
}
// Handle panic, if any, from operationFunc()
defer runtime.RecoverFromPanic(&detailedErr)
context = o.OperationFunc()
return context.EventErr, context.DetailedErr
}
// FailedPrecondition error indicates CSI operation returned failed precondition
// error
type FailedPrecondition struct {
msg string
}
func (err *FailedPrecondition) Error() string {
return err.msg
}
// NewFailedPreconditionError returns a new FailedPrecondition error instance
func NewFailedPreconditionError(msg string) *FailedPrecondition {
return &FailedPrecondition{msg: msg}
}
// IsFailedPreconditionError checks if given error is of type that indicates
// operation failed with precondition
func IsFailedPreconditionError(err error) bool {
var failedPreconditionError *FailedPrecondition
return errors.As(err, &failedPreconditionError)
}
// InfeasibleError errors are a subset of OperationFinished or final error
// codes. In terms of CSI - this usually means that, the operation is not possible
// in current state with given arguments.
type InfeasibleError struct {
msg string
}
func (err *InfeasibleError) Error() string {
return err.msg
}
// NewInfeasibleError returns a new instance of InfeasibleError
func NewInfeasibleError(msg string) *InfeasibleError {
return &InfeasibleError{msg: msg}
}
func IsInfeasibleError(err error) bool {
var infeasibleError *InfeasibleError
return errors.As(err, &infeasibleError)
}
type OperationNotSupported struct {
msg string
}
func (err *OperationNotSupported) Error() string {
return err.msg
}
func NewOperationNotSupportedError(msg string) *OperationNotSupported {
return &OperationNotSupported{msg: msg}
}
func IsOperationNotSupportedError(err error) bool {
var operationNotSupportedError *OperationNotSupported
return errors.As(err, &operationNotSupportedError)
}
// TransientOperationFailure indicates operation failed with a transient error
// and may fix itself when retried.
type TransientOperationFailure struct {
msg string
}
func (err *TransientOperationFailure) Error() string {
return err.msg
}
// NewTransientOperationFailure creates an instance of TransientOperationFailure error
func NewTransientOperationFailure(msg string) *TransientOperationFailure {
return &TransientOperationFailure{msg: msg}
}
// UncertainProgressError indicates operation failed with a non-final error
// and operation may be in-progress in background.
type UncertainProgressError struct {
msg string
}
func (err *UncertainProgressError) Error() string {
return err.msg
}
// NewUncertainProgressError creates an instance of UncertainProgressError type
func NewUncertainProgressError(msg string) *UncertainProgressError {
return &UncertainProgressError{msg: msg}
}
// IsOperationFinishedError checks if given error is of type that indicates
// operation is finished with a FINAL error.
func IsOperationFinishedError(err error) bool {
if _, ok := err.(*UncertainProgressError); ok {
return false
}
if _, ok := err.(*TransientOperationFailure); ok {
return false
}
return true
}
// IsFilesystemMismatchError checks if mount failed because requested filesystem
// on PVC and actual filesystem on disk did not match
func IsFilesystemMismatchError(err error) bool {
mountError := mount.MountError{}
return errors.As(err, &mountError) && mountError.Type == mount.FilesystemMismatch
}
// IsUncertainProgressError checks if given error is of type that indicates
// operation might be in-progress in background.
func IsUncertainProgressError(err error) bool {
if _, ok := err.(*UncertainProgressError); ok {
return true
}
return false
}
const (
// VolumeResizerKey is key that will be used to store resizer used
// for resizing PVC. The generated key/value pair will be added
// as a annotation to the PVC.
VolumeResizerKey = "volume.kubernetes.io/storage-resizer"
)

691
e2e/vendor/k8s.io/kubernetes/pkg/volume/util/util.go generated vendored Normal file
View File

@ -0,0 +1,691 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"context"
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apiruntime "k8s.io/apimachinery/pkg/runtime"
utypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/legacyscheme"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/securitycontext"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util/types"
"k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
"k8s.io/mount-utils"
"k8s.io/utils/io"
utilstrings "k8s.io/utils/strings"
)
const (
readyFileName = "ready"
// ControllerManagedAttachAnnotation is the key of the annotation on Node
// objects that indicates attach/detach operations for the node should be
// managed by the attach/detach controller
ControllerManagedAttachAnnotation string = "volumes.kubernetes.io/controller-managed-attach-detach"
// MountsInGlobalPDPath is name of the directory appended to a volume plugin
// name to create the place for volume mounts in the global PD path.
MountsInGlobalPDPath = "mounts"
// VolumeGidAnnotationKey is the of the annotation on the PersistentVolume
// object that specifies a supplemental GID.
VolumeGidAnnotationKey = "pv.beta.kubernetes.io/gid"
// VolumeDynamicallyCreatedByKey is the key of the annotation on PersistentVolume
// object created dynamically
VolumeDynamicallyCreatedByKey = "kubernetes.io/createdby"
// kubernetesPluginPathPrefix is the prefix of kubernetes plugin mount paths.
kubernetesPluginPathPrefix = "/plugins/kubernetes.io/"
)
// IsReady checks for the existence of a regular file
// called 'ready' in the given directory and returns
// true if that file exists.
func IsReady(dir string) bool {
readyFile := filepath.Join(dir, readyFileName)
s, err := os.Stat(readyFile)
if err != nil {
return false
}
if !s.Mode().IsRegular() {
klog.Errorf("ready-file is not a file: %s", readyFile)
return false
}
return true
}
// SetReady creates a file called 'ready' in the given
// directory. It logs an error if the file cannot be
// created.
func SetReady(dir string) {
if err := os.MkdirAll(dir, 0750); err != nil && !os.IsExist(err) {
klog.Errorf("Can't mkdir %s: %v", dir, err)
return
}
readyFile := filepath.Join(dir, readyFileName)
file, err := os.Create(readyFile)
if err != nil {
klog.Errorf("Can't touch %s: %v", readyFile, err)
return
}
file.Close()
}
// GetSecretForPV locates secret by name and namespace, verifies the secret type, and returns secret map
func GetSecretForPV(secretNamespace, secretName, volumePluginName string, kubeClient clientset.Interface) (map[string]string, error) {
secret := make(map[string]string)
if kubeClient == nil {
return secret, fmt.Errorf("cannot get kube client")
}
secrets, err := kubeClient.CoreV1().Secrets(secretNamespace).Get(context.TODO(), secretName, metav1.GetOptions{})
if err != nil {
return secret, err
}
if secrets.Type != v1.SecretType(volumePluginName) {
return secret, fmt.Errorf("cannot get secret of type %s", volumePluginName)
}
for name, data := range secrets.Data {
secret[name] = string(data)
}
return secret, nil
}
// LoadPodFromFile will read, decode, and return a Pod from a file.
func LoadPodFromFile(filePath string) (*v1.Pod, error) {
if filePath == "" {
return nil, fmt.Errorf("file path not specified")
}
podDef, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read file path %s: %+v", filePath, err)
}
if len(podDef) == 0 {
return nil, fmt.Errorf("file was empty: %s", filePath)
}
pod := &v1.Pod{}
codec := legacyscheme.Codecs.UniversalDecoder()
if err := apiruntime.DecodeInto(codec, podDef, pod); err != nil {
return nil, fmt.Errorf("failed decoding file: %v", err)
}
return pod, nil
}
// CalculateTimeoutForVolume calculates time for a Recycler pod to complete a
// recycle operation. The calculation and return value is either the
// minimumTimeout or the timeoutIncrement per Gi of storage size, whichever is
// greater.
func CalculateTimeoutForVolume(minimumTimeout, timeoutIncrement int, pv *v1.PersistentVolume) int64 {
giQty := resource.MustParse("1Gi")
pvQty := pv.Spec.Capacity[v1.ResourceStorage]
giSize := giQty.Value()
pvSize := pvQty.Value()
timeout := (pvSize / giSize) * int64(timeoutIncrement)
if timeout < int64(minimumTimeout) {
return int64(minimumTimeout)
}
return timeout
}
// GetPath checks if the path from the mounter is empty.
func GetPath(mounter volume.Mounter) (string, error) {
path := mounter.GetPath()
if path == "" {
return "", fmt.Errorf("path is empty %s", reflect.TypeOf(mounter).String())
}
return path, nil
}
// UnmountViaEmptyDir delegates the tear down operation for secret, configmap, git_repo and downwardapi
// to empty_dir
func UnmountViaEmptyDir(dir string, host volume.VolumeHost, volName string, volSpec volume.Spec, podUID utypes.UID) error {
klog.V(3).Infof("Tearing down volume %v for pod %v at %v", volName, podUID, dir)
// Wrap EmptyDir, let it do the teardown.
wrapped, err := host.NewWrapperUnmounter(volName, volSpec, podUID)
if err != nil {
return err
}
return wrapped.TearDownAt(dir)
}
// MountOptionFromSpec extracts and joins mount options from volume spec with supplied options
func MountOptionFromSpec(spec *volume.Spec, options ...string) []string {
pv := spec.PersistentVolume
if pv != nil {
// Use beta annotation first
if mo, ok := pv.Annotations[v1.MountOptionAnnotation]; ok {
moList := strings.Split(mo, ",")
return JoinMountOptions(moList, options)
}
if len(pv.Spec.MountOptions) > 0 {
return JoinMountOptions(pv.Spec.MountOptions, options)
}
}
return options
}
// JoinMountOptions joins mount options eliminating duplicates
func JoinMountOptions(userOptions []string, systemOptions []string) []string {
allMountOptions := sets.New[string]()
for _, mountOption := range userOptions {
if len(mountOption) > 0 {
allMountOptions.Insert(mountOption)
}
}
for _, mountOption := range systemOptions {
allMountOptions.Insert(mountOption)
}
return sets.List(allMountOptions)
}
// ContainsAccessMode returns whether the requested mode is contained by modes
func ContainsAccessMode(modes []v1.PersistentVolumeAccessMode, mode v1.PersistentVolumeAccessMode) bool {
for _, m := range modes {
if m == mode {
return true
}
}
return false
}
// ContainsAllAccessModes returns whether all of the requested modes are contained by modes
func ContainsAllAccessModes(indexedModes []v1.PersistentVolumeAccessMode, requestedModes []v1.PersistentVolumeAccessMode) bool {
for _, mode := range requestedModes {
if !ContainsAccessMode(indexedModes, mode) {
return false
}
}
return true
}
// GetWindowsPath get a windows path
func GetWindowsPath(path string) string {
windowsPath := strings.Replace(path, "/", "\\", -1)
if strings.HasPrefix(windowsPath, "\\") {
windowsPath = "c:" + windowsPath
}
return windowsPath
}
// GetUniquePodName returns a unique identifier to reference a pod by
func GetUniquePodName(pod *v1.Pod) types.UniquePodName {
return types.UniquePodName(pod.UID)
}
// GetUniqueVolumeName returns a unique name representing the volume/plugin.
// Caller should ensure that volumeName is a name/ID uniquely identifying the
// actual backing device, directory, path, etc. for a particular volume.
// The returned name can be used to uniquely reference the volume, for example,
// to prevent operations (attach/detach or mount/unmount) from being triggered
// on the same volume.
func GetUniqueVolumeName(pluginName, volumeName string) v1.UniqueVolumeName {
return v1.UniqueVolumeName(fmt.Sprintf("%s/%s", pluginName, volumeName))
}
// GetUniqueVolumeNameFromSpecWithPod returns a unique volume name with pod
// name included. This is useful to generate different names for different pods
// on same volume.
func GetUniqueVolumeNameFromSpecWithPod(
podName types.UniquePodName, volumePlugin volume.VolumePlugin, volumeSpec *volume.Spec) v1.UniqueVolumeName {
return v1.UniqueVolumeName(
fmt.Sprintf("%s/%v-%s", volumePlugin.GetPluginName(), podName, volumeSpec.Name()))
}
// GetUniqueVolumeNameFromSpec uses the given VolumePlugin to generate a unique
// name representing the volume defined in the specified volume spec.
// This returned name can be used to uniquely reference the actual backing
// device, directory, path, etc. referenced by the given volumeSpec.
// If the given plugin does not support the volume spec, this returns an error.
func GetUniqueVolumeNameFromSpec(
volumePlugin volume.VolumePlugin,
volumeSpec *volume.Spec) (v1.UniqueVolumeName, error) {
if volumePlugin == nil {
return "", fmt.Errorf(
"volumePlugin should not be nil. volumeSpec.Name=%q",
volumeSpec.Name())
}
volumeName, err := volumePlugin.GetVolumeName(volumeSpec)
if err != nil || volumeName == "" {
return "", fmt.Errorf(
"failed to GetVolumeName from volumePlugin for volumeSpec %q err=%v",
volumeSpec.Name(),
err)
}
return GetUniqueVolumeName(
volumePlugin.GetPluginName(),
volumeName),
nil
}
// IsPodTerminated checks if pod is terminated
func IsPodTerminated(pod *v1.Pod, podStatus v1.PodStatus) bool {
// TODO: the guarantees provided by kubelet status are not sufficient to guarantee it's safe to ignore a deleted pod,
// even if everything is notRunning (kubelet does not guarantee that when pod status is waiting that it isn't trying
// to start a container).
return podStatus.Phase == v1.PodFailed || podStatus.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(podStatus.InitContainerStatuses) && notRunning(podStatus.ContainerStatuses) && notRunning(podStatus.EphemeralContainerStatuses))
}
// notRunning returns true if every status is terminated or waiting, or the status list
// is empty.
func notRunning(statuses []v1.ContainerStatus) bool {
for _, status := range statuses {
if status.State.Terminated == nil && status.State.Waiting == nil {
return false
}
}
return true
}
// SplitUniqueName splits the unique name to plugin name and volume name strings. It expects the uniqueName to follow
// the format plugin_name/volume_name and the plugin name must be namespaced as described by the plugin interface,
// i.e. namespace/plugin containing exactly one '/'. This means the unique name will always be in the form of
// plugin_namespace/plugin/volume_name, see k8s.io/kubernetes/pkg/volume/plugins.go VolumePlugin interface
// description and pkg/volume/util/volumehelper/volumehelper.go GetUniqueVolumeNameFromSpec that constructs
// the unique volume names.
func SplitUniqueName(uniqueName v1.UniqueVolumeName) (string, string, error) {
components := strings.SplitN(string(uniqueName), "/", 3)
if len(components) != 3 {
return "", "", fmt.Errorf("cannot split volume unique name %s to plugin/volume components", uniqueName)
}
pluginName := fmt.Sprintf("%s/%s", components[0], components[1])
return pluginName, components[2], nil
}
// NewSafeFormatAndMountFromHost creates a new SafeFormatAndMount with Mounter
// and Exec taken from given VolumeHost.
func NewSafeFormatAndMountFromHost(pluginName string, host volume.VolumeHost) *mount.SafeFormatAndMount {
mounter := host.GetMounter(pluginName)
exec := host.GetExec(pluginName)
return &mount.SafeFormatAndMount{Interface: mounter, Exec: exec}
}
// GetVolumeMode retrieves VolumeMode from pv.
// If the volume doesn't have PersistentVolume, it's an inline volume,
// should return volumeMode as filesystem to keep existing behavior.
func GetVolumeMode(volumeSpec *volume.Spec) (v1.PersistentVolumeMode, error) {
if volumeSpec == nil || volumeSpec.PersistentVolume == nil {
return v1.PersistentVolumeFilesystem, nil
}
if volumeSpec.PersistentVolume.Spec.VolumeMode != nil {
return *volumeSpec.PersistentVolume.Spec.VolumeMode, nil
}
return "", fmt.Errorf("cannot get volumeMode for volume: %v", volumeSpec.Name())
}
// GetPersistentVolumeClaimQualifiedName returns a qualified name for pvc.
func GetPersistentVolumeClaimQualifiedName(claim *v1.PersistentVolumeClaim) string {
return utilstrings.JoinQualifiedName(claim.GetNamespace(), claim.GetName())
}
// CheckVolumeModeFilesystem checks VolumeMode.
// If the mode is Filesystem, return true otherwise return false.
func CheckVolumeModeFilesystem(volumeSpec *volume.Spec) (bool, error) {
volumeMode, err := GetVolumeMode(volumeSpec)
if err != nil {
return true, err
}
if volumeMode == v1.PersistentVolumeBlock {
return false, nil
}
return true, nil
}
// CheckPersistentVolumeClaimModeBlock checks VolumeMode.
// If the mode is Block, return true otherwise return false.
func CheckPersistentVolumeClaimModeBlock(pvc *v1.PersistentVolumeClaim) bool {
return pvc.Spec.VolumeMode != nil && *pvc.Spec.VolumeMode == v1.PersistentVolumeBlock
}
// IsWindowsUNCPath checks if path is prefixed with \\
// This can be used to skip any processing of paths
// that point to SMB shares, local named pipes and local UNC path
func IsWindowsUNCPath(goos, path string) bool {
if goos != "windows" {
return false
}
// Check for UNC prefix \\
if strings.HasPrefix(path, `\\`) {
return true
}
return false
}
// IsWindowsLocalPath checks if path is a local path
// prefixed with "/" or "\" like "/foo/bar" or "\foo\bar"
func IsWindowsLocalPath(goos, path string) bool {
if goos != "windows" {
return false
}
if IsWindowsUNCPath(goos, path) {
return false
}
if strings.Contains(path, ":") {
return false
}
if !(strings.HasPrefix(path, `/`) || strings.HasPrefix(path, `\`)) {
return false
}
return true
}
// MakeAbsolutePath convert path to absolute path according to GOOS
func MakeAbsolutePath(goos, path string) string {
if goos != "windows" {
return filepath.Clean("/" + path)
}
// These are all for windows
// If there is a colon, give up.
if strings.Contains(path, ":") {
return path
}
// If there is a slash, but no drive, add 'c:'
if strings.HasPrefix(path, "/") || strings.HasPrefix(path, "\\") {
return "c:" + path
}
// Otherwise, add 'c:\'
return "c:\\" + path
}
// MapBlockVolume is a utility function to provide a common way of mapping
// block device path for a specified volume and pod. This function should be
// called by volume plugins that implements volume.BlockVolumeMapper.Map() method.
func MapBlockVolume(
blkUtil volumepathhandler.BlockVolumePathHandler,
devicePath,
globalMapPath,
podVolumeMapPath,
volumeMapName string,
podUID utypes.UID,
) error {
// map devicePath to global node path as bind mount
mapErr := blkUtil.MapDevice(devicePath, globalMapPath, string(podUID), true /* bindMount */)
if mapErr != nil {
return fmt.Errorf("blkUtil.MapDevice failed. devicePath: %s, globalMapPath:%s, podUID: %s, bindMount: %v: %v",
devicePath, globalMapPath, string(podUID), true, mapErr)
}
// map devicePath to pod volume path
mapErr = blkUtil.MapDevice(devicePath, podVolumeMapPath, volumeMapName, false /* bindMount */)
if mapErr != nil {
return fmt.Errorf("blkUtil.MapDevice failed. devicePath: %s, podVolumeMapPath:%s, volumeMapName: %s, bindMount: %v: %v",
devicePath, podVolumeMapPath, volumeMapName, false, mapErr)
}
// Take file descriptor lock to keep a block device opened. Otherwise, there is a case
// that the block device is silently removed and attached another device with the same name.
// Container runtime can't handle this problem. To avoid unexpected condition fd lock
// for the block device is required.
_, mapErr = blkUtil.AttachFileDevice(filepath.Join(globalMapPath, string(podUID)))
if mapErr != nil {
return fmt.Errorf("blkUtil.AttachFileDevice failed. globalMapPath:%s, podUID: %s: %v",
globalMapPath, string(podUID), mapErr)
}
return nil
}
// UnmapBlockVolume is a utility function to provide a common way of unmapping
// block device path for a specified volume and pod. This function should be
// called by volume plugins that implements volume.BlockVolumeMapper.Map() method.
func UnmapBlockVolume(
blkUtil volumepathhandler.BlockVolumePathHandler,
globalUnmapPath,
podDeviceUnmapPath,
volumeMapName string,
podUID utypes.UID,
) error {
// Release file descriptor lock.
err := blkUtil.DetachFileDevice(filepath.Join(globalUnmapPath, string(podUID)))
if err != nil {
return fmt.Errorf("blkUtil.DetachFileDevice failed. globalUnmapPath:%s, podUID: %s: %v",
globalUnmapPath, string(podUID), err)
}
// unmap devicePath from pod volume path
unmapDeviceErr := blkUtil.UnmapDevice(podDeviceUnmapPath, volumeMapName, false /* bindMount */)
if unmapDeviceErr != nil {
return fmt.Errorf("blkUtil.DetachFileDevice failed. podDeviceUnmapPath:%s, volumeMapName: %s, bindMount: %v: %v",
podDeviceUnmapPath, volumeMapName, false, unmapDeviceErr)
}
// unmap devicePath from global node path
unmapDeviceErr = blkUtil.UnmapDevice(globalUnmapPath, string(podUID), true /* bindMount */)
if unmapDeviceErr != nil {
return fmt.Errorf("blkUtil.DetachFileDevice failed. globalUnmapPath:%s, podUID: %s, bindMount: %v: %v",
globalUnmapPath, string(podUID), true, unmapDeviceErr)
}
return nil
}
// IsLocalEphemeralVolume determines whether the argument is a local ephemeral
// volume vs. some other type
// Local means the volume is using storage from the local disk that is managed by kubelet.
// Ephemeral means the lifecycle of the volume is the same as the Pod.
func IsLocalEphemeralVolume(volume v1.Volume) bool {
return volume.GitRepo != nil ||
(volume.EmptyDir != nil && volume.EmptyDir.Medium == v1.StorageMediumDefault) ||
volume.ConfigMap != nil
}
// GetPodVolumeNames returns names of volumes that are used in a pod,
// either as filesystem mount or raw block device.
// To save another sweep through containers, SELinux options are optionally collected too.
func GetPodVolumeNames(pod *v1.Pod, collectSELinuxOptions bool) (mounts sets.Set[string], devices sets.Set[string], seLinuxContainerContexts map[string][]*v1.SELinuxOptions) {
mounts = sets.New[string]()
devices = sets.New[string]()
seLinuxContainerContexts = make(map[string][]*v1.SELinuxOptions)
podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(container *v1.Container, containerType podutil.ContainerType) bool {
var seLinuxOptions *v1.SELinuxOptions
if collectSELinuxOptions {
effectiveContainerSecurity := securitycontext.DetermineEffectiveSecurityContext(pod, container)
if effectiveContainerSecurity != nil {
seLinuxOptions = effectiveContainerSecurity.SELinuxOptions
}
}
if container.VolumeMounts != nil {
for _, mount := range container.VolumeMounts {
mounts.Insert(mount.Name)
if seLinuxOptions != nil && collectSELinuxOptions {
seLinuxContainerContexts[mount.Name] = append(seLinuxContainerContexts[mount.Name], seLinuxOptions.DeepCopy())
}
}
}
if container.VolumeDevices != nil {
for _, device := range container.VolumeDevices {
devices.Insert(device.Name)
}
}
return true
})
return
}
// FsUserFrom returns FsUser of pod, which is determined by the runAsUser
// attributes.
func FsUserFrom(pod *v1.Pod) *int64 {
var fsUser *int64
podutil.VisitContainers(&pod.Spec, podutil.AllFeatureEnabledContainers(), func(container *v1.Container, containerType podutil.ContainerType) bool {
runAsUser, ok := securitycontext.DetermineEffectiveRunAsUser(pod, container)
// One container doesn't specify user or there are more than one
// non-root UIDs.
if !ok || (fsUser != nil && *fsUser != *runAsUser) {
fsUser = nil
return false
}
if fsUser == nil {
fsUser = runAsUser
}
return true
})
return fsUser
}
// HasMountRefs checks if the given mountPath has mountRefs.
// TODO: this is a workaround for the unmount device issue caused by gci mounter.
// In GCI cluster, if gci mounter is used for mounting, the container started by mounter
// script will cause additional mounts created in the container. Since these mounts are
// irrelevant to the original mounts, they should be not considered when checking the
// mount references. The current solution is to filter out those mount paths that contain
// the k8s plugin suffix of original mount path.
func HasMountRefs(mountPath string, mountRefs []string) bool {
// A mountPath typically is like
// /var/lib/kubelet/plugins/kubernetes.io/some-plugin/mounts/volume-XXXX
// Mount refs can look like
// /home/somewhere/var/lib/kubelet/plugins/kubernetes.io/some-plugin/...
// but if /var/lib/kubelet is mounted to a different device a ref might be like
// /mnt/some-other-place/kubelet/plugins/kubernetes.io/some-plugin/...
// Neither of the above should be counted as a mount ref as those are handled
// by the kubelet. What we're concerned about is a path like
// /data/local/some/manual/mount
// As unmounting could interrupt usage from that mountpoint.
//
// So instead of looking for the entire /var/lib/... path, the plugins/kubernetes.io/
// suffix is trimmed off and searched for.
//
// If there isn't a /plugins/... path, the whole mountPath is used instead.
pathToFind := mountPath
if i := strings.Index(mountPath, kubernetesPluginPathPrefix); i > -1 {
pathToFind = mountPath[i:]
}
for _, ref := range mountRefs {
if !strings.Contains(ref, pathToFind) {
return true
}
}
return false
}
// IsMultiAttachAllowed checks if attaching this volume to multiple nodes is definitely not allowed/possible.
// In its current form, this function can only reliably say for which volumes it's definitely forbidden. If it returns
// false, it is not guaranteed that multi-attach is actually supported by the volume type and we must rely on the
// attacher to fail fast in such cases.
// Please see https://github.com/kubernetes/kubernetes/issues/40669 and https://github.com/kubernetes/kubernetes/pull/40148#discussion_r98055047
func IsMultiAttachAllowed(volumeSpec *volume.Spec) bool {
if volumeSpec == nil {
// we don't know if it's supported or not and let the attacher fail later in cases it's not supported
return true
}
if volumeSpec.Volume != nil {
// Check for volume types which are known to fail slow or cause trouble when trying to multi-attach
if volumeSpec.Volume.AzureDisk != nil ||
volumeSpec.Volume.Cinder != nil {
return false
}
}
// Only if this volume is a persistent volume, we have reliable information on whether it's allowed or not to
// multi-attach. We trust in the individual volume implementations to not allow unsupported access modes
if volumeSpec.PersistentVolume != nil {
// Check for persistent volume types which do not fail when trying to multi-attach
if len(volumeSpec.PersistentVolume.Spec.AccessModes) == 0 {
// No access mode specified so we don't know for sure. Let the attacher fail if needed
return true
}
// check if this volume is allowed to be attached to multiple PODs/nodes, if yes, return false
for _, accessMode := range volumeSpec.PersistentVolume.Spec.AccessModes {
if accessMode == v1.ReadWriteMany || accessMode == v1.ReadOnlyMany {
return true
}
}
return false
}
// we don't know if it's supported or not and let the attacher fail later in cases it's not supported
return true
}
// IsAttachableVolume checks if the given volumeSpec is an attachable volume or not
func IsAttachableVolume(volumeSpec *volume.Spec, volumePluginMgr *volume.VolumePluginMgr) bool {
attachableVolumePlugin, _ := volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
if attachableVolumePlugin != nil {
volumeAttacher, err := attachableVolumePlugin.NewAttacher()
if err == nil && volumeAttacher != nil {
return true
}
}
return false
}
// IsDeviceMountableVolume checks if the given volumeSpec is an device mountable volume or not
func IsDeviceMountableVolume(volumeSpec *volume.Spec, volumePluginMgr *volume.VolumePluginMgr) bool {
deviceMountableVolumePlugin, _ := volumePluginMgr.FindDeviceMountablePluginBySpec(volumeSpec)
if deviceMountableVolumePlugin != nil {
volumeDeviceMounter, err := deviceMountableVolumePlugin.NewDeviceMounter()
if err == nil && volumeDeviceMounter != nil {
return true
}
}
return false
}
// GetReliableMountRefs calls mounter.GetMountRefs and retries on IsInconsistentReadError.
// To be used in volume reconstruction of volume plugins that don't have any protection
// against mounting a single volume on multiple nodes (such as attach/detach).
func GetReliableMountRefs(mounter mount.Interface, mountPath string) ([]string, error) {
var paths []string
var lastErr error
err := wait.PollImmediate(10*time.Millisecond, time.Minute, func() (bool, error) {
var err error
paths, err = mounter.GetMountRefs(mountPath)
if io.IsInconsistentReadError(err) {
lastErr = err
return false, nil
}
if err != nil {
return false, err
}
return true, nil
})
if err == wait.ErrWaitTimeout {
return nil, lastErr
}
return paths, err
}

View File

@ -0,0 +1,295 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package volumepathhandler
import (
"fmt"
"os"
"path/filepath"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
utilexec "k8s.io/utils/exec"
"k8s.io/apimachinery/pkg/types"
)
const (
losetupPath = "losetup"
ErrDeviceNotFound = "device not found"
)
// BlockVolumePathHandler defines a set of operations for handling block volume-related operations
type BlockVolumePathHandler interface {
// MapDevice creates a symbolic link to block device under specified map path
MapDevice(devicePath string, mapPath string, linkName string, bindMount bool) error
// UnmapDevice removes a symbolic link to block device under specified map path
UnmapDevice(mapPath string, linkName string, bindMount bool) error
// RemovePath removes a file or directory on specified map path
RemoveMapPath(mapPath string) error
// IsSymlinkExist returns true if specified symbolic link exists
IsSymlinkExist(mapPath string) (bool, error)
// IsDeviceBindMountExist returns true if specified bind mount exists
IsDeviceBindMountExist(mapPath string) (bool, error)
// GetDeviceBindMountRefs searches bind mounts under global map path
GetDeviceBindMountRefs(devPath string, mapPath string) ([]string, error)
// FindGlobalMapPathUUIDFromPod finds {pod uuid} symbolic link under globalMapPath
// corresponding to map path symlink, and then return global map path with pod uuid.
FindGlobalMapPathUUIDFromPod(pluginDir, mapPath string, podUID types.UID) (string, error)
// AttachFileDevice takes a path to a regular file and makes it available as an
// attached block device.
AttachFileDevice(path string) (string, error)
// DetachFileDevice takes a path to the attached block device and
// detach it from block device.
DetachFileDevice(path string) error
// GetLoopDevice returns the full path to the loop device associated with the given path.
GetLoopDevice(path string) (string, error)
}
// NewBlockVolumePathHandler returns a new instance of BlockVolumeHandler.
func NewBlockVolumePathHandler() BlockVolumePathHandler {
var volumePathHandler VolumePathHandler
return volumePathHandler
}
// VolumePathHandler is path related operation handlers for block volume
type VolumePathHandler struct {
}
// MapDevice creates a symbolic link to block device under specified map path
func (v VolumePathHandler) MapDevice(devicePath string, mapPath string, linkName string, bindMount bool) error {
// Example of global map path:
// globalMapPath/linkName: plugins/kubernetes.io/{PluginName}/{DefaultKubeletVolumeDevicesDirName}/{volumePluginDependentPath}/{podUid}
// linkName: {podUid}
//
// Example of pod device map path:
// podDeviceMapPath/linkName: pods/{podUid}/{DefaultKubeletVolumeDevicesDirName}/{escapeQualifiedPluginName}/{volumeName}
// linkName: {volumeName}
if len(devicePath) == 0 {
return fmt.Errorf("failed to map device to map path. devicePath is empty")
}
if len(mapPath) == 0 {
return fmt.Errorf("failed to map device to map path. mapPath is empty")
}
if !filepath.IsAbs(mapPath) {
return fmt.Errorf("the map path should be absolute: map path: %s", mapPath)
}
klog.V(5).Infof("MapDevice: devicePath %s", devicePath)
klog.V(5).Infof("MapDevice: mapPath %s", mapPath)
klog.V(5).Infof("MapDevice: linkName %s", linkName)
// Check and create mapPath
_, err := os.Stat(mapPath)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("cannot validate map path: %s: %v", mapPath, err)
}
if err = os.MkdirAll(mapPath, 0750); err != nil {
return fmt.Errorf("failed to mkdir %s: %v", mapPath, err)
}
if bindMount {
return mapBindMountDevice(v, devicePath, mapPath, linkName)
}
return mapSymlinkDevice(v, devicePath, mapPath, linkName)
}
func mapBindMountDevice(v VolumePathHandler, devicePath string, mapPath string, linkName string) error {
// Check bind mount exists
linkPath := filepath.Join(mapPath, string(linkName))
file, err := os.Stat(linkPath)
if err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to stat file %s: %v", linkPath, err)
}
// Create file
newFile, err := os.OpenFile(linkPath, os.O_CREATE|os.O_RDWR, 0750)
if err != nil {
return fmt.Errorf("failed to open file %s: %v", linkPath, err)
}
if err := newFile.Close(); err != nil {
return fmt.Errorf("failed to close file %s: %v", linkPath, err)
}
} else {
// Check if device file
// TODO: Need to check if this device file is actually the expected bind mount
if file.Mode()&os.ModeDevice == os.ModeDevice {
klog.Warningf("Warning: Map skipped because bind mount already exist on the path: %v", linkPath)
return nil
}
klog.Warningf("Warning: file %s is already exist but not mounted, skip creating file", linkPath)
}
// Bind mount file
mounter := &mount.SafeFormatAndMount{Interface: mount.New(""), Exec: utilexec.New()}
if err := mounter.MountSensitiveWithoutSystemd(devicePath, linkPath, "" /* fsType */, []string{"bind"}, nil); err != nil {
return fmt.Errorf("failed to bind mount devicePath: %s to linkPath %s: %v", devicePath, linkPath, err)
}
return nil
}
func mapSymlinkDevice(v VolumePathHandler, devicePath string, mapPath string, linkName string) error {
// Remove old symbolic link(or file) then create new one.
// This should be done because current symbolic link is
// stale across node reboot.
linkPath := filepath.Join(mapPath, string(linkName))
if err := os.Remove(linkPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove file %s: %v", linkPath, err)
}
return os.Symlink(devicePath, linkPath)
}
// UnmapDevice removes a symbolic link associated to block device under specified map path
func (v VolumePathHandler) UnmapDevice(mapPath string, linkName string, bindMount bool) error {
if len(mapPath) == 0 {
return fmt.Errorf("failed to unmap device from map path. mapPath is empty")
}
klog.V(5).Infof("UnmapDevice: mapPath %s", mapPath)
klog.V(5).Infof("UnmapDevice: linkName %s", linkName)
if bindMount {
return unmapBindMountDevice(v, mapPath, linkName)
}
return unmapSymlinkDevice(v, mapPath, linkName)
}
func unmapBindMountDevice(v VolumePathHandler, mapPath string, linkName string) error {
// Check bind mount exists
linkPath := filepath.Join(mapPath, string(linkName))
if isMountExist, checkErr := v.IsDeviceBindMountExist(linkPath); checkErr != nil {
return checkErr
} else if !isMountExist {
klog.Warningf("Warning: Unmap skipped because bind mount does not exist on the path: %v", linkPath)
// Check if linkPath still exists
if _, err := os.Stat(linkPath); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to check if path %s exists: %v", linkPath, err)
}
// linkPath has already been removed
return nil
}
// Remove file
if err := os.Remove(linkPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove file %s: %v", linkPath, err)
}
return nil
}
// Unmount file
mounter := &mount.SafeFormatAndMount{Interface: mount.New(""), Exec: utilexec.New()}
if err := mounter.Unmount(linkPath); err != nil {
return fmt.Errorf("failed to unmount linkPath %s: %v", linkPath, err)
}
// Remove file
if err := os.Remove(linkPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove file %s: %v", linkPath, err)
}
return nil
}
func unmapSymlinkDevice(v VolumePathHandler, mapPath string, linkName string) error {
// Check symbolic link exists
linkPath := filepath.Join(mapPath, string(linkName))
if islinkExist, checkErr := v.IsSymlinkExist(linkPath); checkErr != nil {
return checkErr
} else if !islinkExist {
klog.Warningf("Warning: Unmap skipped because symlink does not exist on the path: %v", linkPath)
return nil
}
return os.Remove(linkPath)
}
// RemoveMapPath removes a file or directory on specified map path
func (v VolumePathHandler) RemoveMapPath(mapPath string) error {
if len(mapPath) == 0 {
return fmt.Errorf("failed to remove map path. mapPath is empty")
}
klog.V(5).Infof("RemoveMapPath: mapPath %s", mapPath)
err := os.RemoveAll(mapPath)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove directory %s: %v", mapPath, err)
}
return nil
}
// IsSymlinkExist returns true if specified file exists and the type is symbolik link.
// If file doesn't exist, or file exists but not symbolic link, return false with no error.
// On other cases, return false with error from Lstat().
func (v VolumePathHandler) IsSymlinkExist(mapPath string) (bool, error) {
fi, err := os.Lstat(mapPath)
if err != nil {
// If file doesn't exist, return false and no error
if os.IsNotExist(err) {
return false, nil
}
// Return error from Lstat()
return false, fmt.Errorf("failed to Lstat file %s: %v", mapPath, err)
}
// If file exits and it's symbolic link, return true and no error
if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
return true, nil
}
// If file exits but it's not symbolic link, return false and no error
return false, nil
}
// IsDeviceBindMountExist returns true if specified file exists and the type is device.
// If file doesn't exist, or file exists but not device, return false with no error.
// On other cases, return false with error from Lstat().
func (v VolumePathHandler) IsDeviceBindMountExist(mapPath string) (bool, error) {
fi, err := os.Lstat(mapPath)
if err != nil {
// If file doesn't exist, return false and no error
if os.IsNotExist(err) {
return false, nil
}
// Return error from Lstat()
return false, fmt.Errorf("failed to Lstat file %s: %v", mapPath, err)
}
// If file exits and it's device, return true and no error
if fi.Mode()&os.ModeDevice == os.ModeDevice {
return true, nil
}
// If file exits but it's not device, return false and no error
return false, nil
}
// GetDeviceBindMountRefs searches bind mounts under global map path
func (v VolumePathHandler) GetDeviceBindMountRefs(devPath string, mapPath string) ([]string, error) {
var refs []string
files, err := os.ReadDir(mapPath)
if err != nil {
return nil, err
}
for _, file := range files {
if file.Type()&os.ModeDevice != os.ModeDevice {
continue
}
filename := file.Name()
// TODO: Might need to check if the file is actually linked to devPath
refs = append(refs, filepath.Join(mapPath, filename))
}
klog.V(5).Infof("GetDeviceBindMountRefs: refs %v", refs)
return refs, nil
}

View File

@ -0,0 +1,236 @@
//go:build linux
// +build linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package volumepathhandler
import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
)
// AttachFileDevice takes a path to a regular file and makes it available as an
// attached block device.
func (v VolumePathHandler) AttachFileDevice(path string) (string, error) {
blockDevicePath, err := v.GetLoopDevice(path)
if err != nil && err.Error() != ErrDeviceNotFound {
return "", fmt.Errorf("GetLoopDevice failed for path %s: %v", path, err)
}
// If no existing loop device for the path, create one
if blockDevicePath == "" {
klog.V(4).Infof("Creating device for path: %s", path)
blockDevicePath, err = makeLoopDevice(path)
if err != nil {
return "", fmt.Errorf("makeLoopDevice failed for path %s: %v", path, err)
}
}
return blockDevicePath, nil
}
// DetachFileDevice takes a path to the attached block device and
// detach it from block device.
func (v VolumePathHandler) DetachFileDevice(path string) error {
loopPath, err := v.GetLoopDevice(path)
if err != nil {
if err.Error() == ErrDeviceNotFound {
klog.Warningf("couldn't find loopback device which takes file descriptor lock. Skip detaching device. device path: %q", path)
} else {
return fmt.Errorf("GetLoopDevice failed for path %s: %v", path, err)
}
} else {
if len(loopPath) != 0 {
err = removeLoopDevice(loopPath)
if err != nil {
return fmt.Errorf("removeLoopDevice failed for path %s: %v", path, err)
}
}
}
return nil
}
// GetLoopDevice returns the full path to the loop device associated with the given path.
func (v VolumePathHandler) GetLoopDevice(path string) (string, error) {
_, err := os.Stat(path)
if os.IsNotExist(err) {
return "", errors.New(ErrDeviceNotFound)
}
if err != nil {
return "", fmt.Errorf("not attachable: %v", err)
}
return getLoopDeviceFromSysfs(path)
}
func makeLoopDevice(path string) (string, error) {
args := []string{"-f", path}
cmd := exec.Command(losetupPath, args...)
out, err := cmd.CombinedOutput()
if err != nil {
klog.V(2).Infof("Failed device create command for path: %s %v %s", path, err, out)
return "", fmt.Errorf("losetup %s failed: %v", strings.Join(args, " "), err)
}
return getLoopDeviceFromSysfs(path)
}
// removeLoopDevice removes specified loopback device
func removeLoopDevice(device string) error {
args := []string{"-d", device}
cmd := exec.Command(losetupPath, args...)
out, err := cmd.CombinedOutput()
if err != nil {
if _, err := os.Stat(device); os.IsNotExist(err) {
return nil
}
klog.V(2).Infof("Failed to remove loopback device: %s: %v %s", device, err, out)
return fmt.Errorf("losetup -d %s failed: %v", device, err)
}
return nil
}
// getLoopDeviceFromSysfs finds the backing file for a loop
// device from sysfs via "/sys/block/loop*/loop/backing_file".
func getLoopDeviceFromSysfs(path string) (string, error) {
// If the file is a symlink.
realPath, err := filepath.EvalSymlinks(path)
if err != nil {
return "", fmt.Errorf("failed to evaluate path %s: %s", path, err)
}
devices, err := filepath.Glob("/sys/block/loop*")
if err != nil {
return "", fmt.Errorf("failed to list loop devices in sysfs: %s", err)
}
for _, device := range devices {
backingFile := fmt.Sprintf("%s/loop/backing_file", device)
// The contents of this file is the absolute path of "path".
data, err := os.ReadFile(backingFile)
if err != nil {
continue
}
// Return the first match.
backingFilePath := cleanBackingFilePath(string(data))
if backingFilePath == path || backingFilePath == realPath {
return fmt.Sprintf("/dev/%s", filepath.Base(device)), nil
}
}
return "", errors.New(ErrDeviceNotFound)
}
// cleanPath remove any trailing substrings that are not part of the backing file path.
func cleanBackingFilePath(path string) string {
// If the block device was deleted, the path will contain a "(deleted)" suffix
path = strings.TrimSpace(path)
path = strings.TrimSuffix(path, "(deleted)")
return strings.TrimSpace(path)
}
// FindGlobalMapPathUUIDFromPod finds {pod uuid} bind mount under globalMapPath
// corresponding to map path symlink, and then return global map path with pod uuid.
// (See pkg/volume/volume.go for details on a global map path and a pod device map path.)
// ex. mapPath symlink: pods/{podUid}}/{DefaultKubeletVolumeDevicesDirName}/{escapeQualifiedPluginName}/{volumeName} -> /dev/sdX
//
// globalMapPath/{pod uuid} bind mount: plugins/kubernetes.io/{PluginName}/{DefaultKubeletVolumeDevicesDirName}/{volumePluginDependentPath}/{pod uuid} -> /dev/sdX
func (v VolumePathHandler) FindGlobalMapPathUUIDFromPod(pluginDir, mapPath string, podUID types.UID) (string, error) {
var globalMapPathUUID string
// Find symbolic link named pod uuid under plugin dir
err := filepath.Walk(pluginDir, func(path string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if (fi.Mode()&os.ModeDevice == os.ModeDevice) && (fi.Name() == string(podUID)) {
klog.V(5).Infof("FindGlobalMapPathFromPod: path %s, mapPath %s", path, mapPath)
if res, err := compareBindMountAndSymlinks(path, mapPath); err == nil && res {
globalMapPathUUID = path
}
}
return nil
})
if err != nil {
return "", fmt.Errorf("FindGlobalMapPathUUIDFromPod failed: %v", err)
}
klog.V(5).Infof("FindGlobalMapPathFromPod: globalMapPathUUID %s", globalMapPathUUID)
// Return path contains global map path + {pod uuid}
return globalMapPathUUID, nil
}
// compareBindMountAndSymlinks returns if global path (bind mount) and
// pod path (symlink) are pointing to the same device.
// If there is an error in checking it returns error.
func compareBindMountAndSymlinks(global, pod string) (bool, error) {
// To check if bind mount and symlink are pointing to the same device,
// we need to check if they are pointing to the devices that have same major/minor number.
// Get the major/minor number for global path
devNumGlobal, err := getDeviceMajorMinor(global)
if err != nil {
return false, fmt.Errorf("getDeviceMajorMinor failed for path %s: %v", global, err)
}
// Get the symlinked device from the pod path
devPod, err := os.Readlink(pod)
if err != nil {
return false, fmt.Errorf("failed to readlink path %s: %v", pod, err)
}
// Get the major/minor number for the symlinked device from the pod path
devNumPod, err := getDeviceMajorMinor(devPod)
if err != nil {
return false, fmt.Errorf("getDeviceMajorMinor failed for path %s: %v", devPod, err)
}
klog.V(5).Infof("CompareBindMountAndSymlinks: devNumGlobal %s, devNumPod %s", devNumGlobal, devNumPod)
// Check if the major/minor number are the same
if devNumGlobal == devNumPod {
return true, nil
}
return false, nil
}
// getDeviceMajorMinor returns major/minor number for the path with below format:
// major:minor (in hex)
// ex)
//
// fc:10
func getDeviceMajorMinor(path string) (string, error) {
var stat unix.Stat_t
if err := unix.Stat(path, &stat); err != nil {
return "", fmt.Errorf("failed to stat path %s: %v", path, err)
}
devNumber := uint64(stat.Rdev)
major := unix.Major(devNumber)
minor := unix.Minor(devNumber)
return fmt.Sprintf("%x:%x", major, minor), nil
}

View File

@ -0,0 +1,49 @@
//go:build !linux
// +build !linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package volumepathhandler
import (
"fmt"
"k8s.io/apimachinery/pkg/types"
)
// AttachFileDevice takes a path to a regular file and makes it available as an
// attached block device.
func (v VolumePathHandler) AttachFileDevice(path string) (string, error) {
return "", fmt.Errorf("AttachFileDevice not supported for this build.")
}
// DetachFileDevice takes a path to the attached block device and
// detach it from block device.
func (v VolumePathHandler) DetachFileDevice(path string) error {
return fmt.Errorf("DetachFileDevice not supported for this build.")
}
// GetLoopDevice returns the full path to the loop device associated with the given path.
func (v VolumePathHandler) GetLoopDevice(path string) (string, error) {
return "", fmt.Errorf("GetLoopDevice not supported for this build.")
}
// FindGlobalMapPathUUIDFromPod finds {pod uuid} bind mount under globalMapPath
// corresponding to map path symlink, and then return global map path with pod uuid.
func (v VolumePathHandler) FindGlobalMapPathUUIDFromPod(pluginDir, mapPath string, podUID types.UID) (string, error) {
return "", fmt.Errorf("FindGlobalMapPathUUIDFromPod not supported for this build.")
}