Update Unstage transaction to undo steps done in Stage

In unstage we now adhere to the transaction (or order of steps)
done in Stage. To enable this we stash the image meta data
into a local file on the staging path for use with unstage
request.

This helps in unmapping a stale map, in case the mount or
other steps in the transaction are complete.

Signed-off-by: ShyamsundarR <srangana@redhat.com>
This commit is contained in:
ShyamsundarR 2019-08-03 18:11:28 -04:00 committed by mergify[bot]
parent 44f7b1fe4b
commit 885ec7049d
4 changed files with 214 additions and 119 deletions

View File

@ -57,3 +57,12 @@ type ErrInvalidVolID struct {
func (e ErrInvalidVolID) Error() string {
return e.err.Error()
}
// ErrMissingStash is returned when the image metadata stash file is not found
type ErrMissingStash struct {
err error
}
func (e ErrMissingStash) Error() string {
return e.err.Error()
}

View File

@ -19,8 +19,6 @@ package rbd
import (
"fmt"
"os"
"os/exec"
"regexp"
"strings"
csicommon "github.com/ceph/ceph-csi/pkg/csi-common"
@ -45,9 +43,15 @@ type NodeServer struct {
// Implementation notes:
// - stagingTargetPath is the directory passed in the request where the volume needs to be staged
// - We stage the volume into a directory, named after the VolumeID inside stagingTargetPath if
// it is a file system
// it is a file system
// - We stage the volume into a file, named after the VolumeID inside stagingTargetPath if it is
// a block volume
// a block volume
// - Order of operation execution: (useful for defer stacking and when Unstaging to ensure steps
// are done in reverse, this is done in undoStagingTransaction)
// - Stash image metadata under staging path
// - Map the image (creates a device)
// - Create the staging file/directory under staging path
// - Stage the device (mount the device mapped for image)
func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) {
if err := util.ValidateNodeStageVolumeRequest(req); err != nil {
return nil, err
@ -89,8 +93,8 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
isLegacyVolume = true
}
stagingTargetPath := req.GetStagingTargetPath()
stagingTargetPath += "/" + volID
stagingParentPath := req.GetStagingTargetPath()
stagingTargetPath := stagingParentPath + "/" + req.GetVolumeId()
idLk := nodeVolumeIDLocker.Lock(volID)
defer nodeVolumeIDLocker.Unlock(idLk, volID)
@ -113,22 +117,29 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
}
volOptions.RbdImageName = volName
isMounted := false
isStagePathCreated := false
devicePath := ""
// Stash image details prior to mapping the image (useful during Unstage as it has no
// voloptions passed to the RPC as per the CSI spec)
err = stashRBDImageMetadata(volOptions, stagingParentPath)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
defer func() {
if err != nil {
ns.undoStagingTransaction(stagingParentPath, devicePath, volID, isStagePathCreated, isMounted)
}
}()
// Mapping RBD image
devicePath, err := attachRBDImage(volOptions, cr)
devicePath, err = attachRBDImage(volOptions, cr)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
klog.V(4).Infof("rbd image: %s/%s was successfully mapped at %s\n", req.GetVolumeId(), volOptions.Pool, devicePath)
isMounted := false
isStagePathCreated := false
// if mounting to stagingpath fails unmap the rbd device. this wont leave any
// stale rbd device if unstage is not called
defer func() {
if err != nil {
ns.cleanupStagingPath(stagingTargetPath, devicePath, volID, isStagePathCreated, isMounted)
}
}()
err = ns.createStageMountPoint(stagingTargetPath, isBlock)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
@ -152,24 +163,40 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
return &csi.NodeStageVolumeResponse{}, nil
}
func (ns *NodeServer) cleanupStagingPath(stagingTargetPath, devicePath, volID string, isStagePathCreated, isMounted bool) {
func (ns *NodeServer) undoStagingTransaction(stagingParentPath, devicePath, volID string, isStagePathCreated, isMounted bool) {
var err error
stagingTargetPath := stagingParentPath + "/" + volID
if isMounted {
err = ns.mounter.Unmount(stagingTargetPath)
if err != nil {
klog.Errorf("failed to unmount stagingtargetPath: %s with error: %v", stagingTargetPath, err)
return
}
}
// remove the block file created on staging path
// remove the file/directory created on staging path
if isStagePathCreated {
err = os.Remove(stagingTargetPath)
if err != nil {
klog.Errorf("failed to remove stagingtargetPath: %s with error: %v", stagingTargetPath, err)
// continue on failure to unmap the image, as leaving stale images causes more issues than a stale file/directory
}
}
// Unmapping rbd device
if err = detachRBDDevice(devicePath); err != nil {
klog.Errorf("failed to unmap rbd device: %s for volume %s with error: %v", devicePath, volID, err)
if devicePath != "" {
err = detachRBDDevice(devicePath)
if err != nil {
klog.Errorf("failed to unmap rbd device: %s for volume %s with error: %v", devicePath, volID, err)
// continue on failure to delete the stash file, as kubernetes will fail to delete the staging path otherwise
}
}
// Cleanup the stashed image metadata
if err = cleanupRBDImageMetadataStash(stagingParentPath); err != nil {
klog.Errorf("failed to cleanup image metadata stash (%v)", err)
return
}
}
@ -190,8 +217,10 @@ func (ns *NodeServer) createStageMountPoint(mountPath string, isBlock bool) erro
err := os.Mkdir(mountPath, 0750)
if err != nil {
klog.Errorf("failed to create mountPath:%s with error: %v", mountPath, err)
return status.Error(codes.Internal, err.Error())
if !os.IsExist(err) {
klog.Errorf("failed to create mountPath:%s with error: %v", mountPath, err)
return status.Error(codes.Internal, err.Error())
}
}
return nil
@ -372,7 +401,7 @@ func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu
return nil, status.Error(codes.Internal, err.Error())
}
klog.Infof("rbd: successfully unbinded volume %s from %s", req.GetVolumeId(), targetPath)
klog.Infof("rbd: successfully unbound volume %s from %s", req.GetVolumeId(), targetPath)
return &csi.NodeUnpublishVolumeResponse{}, nil
}
@ -384,111 +413,72 @@ func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag
return nil, err
}
stagingTargetPath := req.GetStagingTargetPath()
stagingTargetPath += "/" + req.GetVolumeId()
stagingParentPath := req.GetStagingTargetPath()
stagingTargetPath := stagingParentPath + "/" + req.GetVolumeId()
notMnt, err := mount.IsNotMountPoint(ns.mounter, stagingTargetPath)
if err != nil {
if os.IsNotExist(err) {
// staging targetPath has already been deleted
klog.V(4).Infof("stagingTargetPath: %s has already been deleted", stagingTargetPath)
return &csi.NodeUnstageVolumeResponse{}, nil
if !os.IsNotExist(err) {
return nil, status.Error(codes.NotFound, err.Error())
}
return nil, status.Error(codes.NotFound, err.Error())
// Continue on ENOENT errors as we may still have the image mapped
notMnt = true
}
if notMnt {
// TODO: IsNotExist error should have been caught above
if err = os.Remove(stagingTargetPath); err != nil {
if !notMnt {
// Unmounting the image
err = ns.mounter.Unmount(stagingTargetPath)
if err != nil {
klog.V(3).Infof("failed to unmount targetPath: %s with error: %v", stagingTargetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
return &csi.NodeUnstageVolumeResponse{}, nil
}
// Unmount the volume
devicePath, cnt, err := mount.GetDeviceNameFromMount(ns.mounter, stagingTargetPath)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
if err = ns.unmount(stagingTargetPath, devicePath, cnt); err != nil {
return nil, err
}
if err = os.Remove(stagingTargetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
klog.Infof("rbd: successfully unmounted volume %s from %s", req.GetVolumeId(), stagingTargetPath)
return &csi.NodeUnstageVolumeResponse{}, nil
}
func (ns *NodeServer) unmount(targetPath, devicePath string, cnt int) error {
var err error
// Bind mounted device needs to be resolved by using resolveBindMountedBlockDevice
if devicePath == "devtmpfs" {
devicePath, err = resolveBindMountedBlockDevice(targetPath)
if err != nil {
return status.Error(codes.Internal, err.Error())
// Any error is critical as Staging path is expected to be empty by Kubernetes, it otherwise
// keeps invoking Unstage. Hence any errors removing files within this path is a critical
// error
if !os.IsNotExist(err) {
klog.Errorf("failed to remove staging target path (%s): (%v)", stagingTargetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
klog.V(4).Infof("NodeUnpublishVolume: devicePath: %s, (original)cnt: %d\n", devicePath, cnt)
// cnt for GetDeviceNameFromMount is broken for bind mouted device,
// it counts total number of mounted "devtmpfs", instead of counting this device.
// So, forcibly setting cnt to 1 here.
// TODO : fix this properly
cnt = 1
}
klog.V(4).Infof("NodeUnpublishVolume: targetPath: %s, devicePath: %s\n", targetPath, devicePath)
// Unmounting the image
err = ns.mounter.Unmount(targetPath)
imgInfo, err := lookupRBDImageMetadataStash(stagingParentPath)
if err != nil {
klog.V(3).Infof("failed to unmount targetPath: %s with error: %v", targetPath, err)
return status.Error(codes.Internal, err.Error())
}
klog.V(2).Infof("failed to find image metadata: %v", err)
// It is an error if it was mounted, as we should have found the image metadata file with
// no errors
if !notMnt {
return nil, status.Error(codes.Internal, err.Error())
}
cnt--
if cnt != 0 {
// TODO should this be fixed not to success, so that driver can retry unmounting?
return nil
// If not mounted, and error is anything other than metadata file missing, it is an error
if _, ok := err.(ErrMissingStash); !ok {
return nil, status.Error(codes.Internal, err.Error())
}
// It was not mounted and image metadata is also missing, we are done as the last step in
// in the staging transaction is complete
return &csi.NodeUnstageVolumeResponse{}, nil
}
// Unmapping rbd device
if err = detachRBDDevice(devicePath); err != nil {
klog.V(3).Infof("failed to unmap rbd device: %s with error: %v", devicePath, err)
return status.Error(codes.Internal, err.Error())
imageSpec := imgInfo.Pool + "/" + imgInfo.ImageName
if err = detachRBDImageOrDeviceSpec(imageSpec, true, imgInfo.NbdAccess); err != nil {
klog.Errorf("error unmapping volume (%s) from staging path (%s): (%v)",
req.GetVolumeId(), stagingTargetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
return nil
}
func resolveBindMountedBlockDevice(mountPath string) (string, error) {
// #nosec
cmd := exec.Command("findmnt", "-n", "-o", "SOURCE", "--first-only", "--target", mountPath)
out, err := cmd.CombinedOutput()
if err != nil {
klog.V(2).Infof("Failed findmnt command for path %s: %s %v", mountPath, out, err)
return "", err
}
return parseFindMntResolveSource(string(out))
}
klog.Infof("successfully unmounted volume (%s) from staging path (%s)",
req.GetVolumeId(), stagingTargetPath)
// parse output of "findmnt -o SOURCE --first-only --target" and return just the SOURCE
func parseFindMntResolveSource(out string) (string, error) {
// cut trailing newline
out = strings.TrimSuffix(out, "\n")
// Check if out is a mounted device
reMnt := regexp.MustCompile("^(/[^/]+(?:/[^/]*)*)$")
if match := reMnt.FindStringSubmatch(out); match != nil {
return match[1], nil
if err = cleanupRBDImageMetadataStash(stagingParentPath); err != nil {
klog.Errorf("failed to cleanup image metadata stash (%v)", err)
return nil, status.Error(codes.Internal, err.Error())
}
// Check if out is a block device
// nolint
reBlk := regexp.MustCompile("^devtmpfs\\[(/[^/]+(?:/[^/]*)*)\\]$")
if match := reBlk.FindStringSubmatch(out); match != nil {
return fmt.Sprintf("/dev%s", match[1]), nil
}
return "", fmt.Errorf("parseFindMntResolveSource: %s doesn't match to any expected findMnt output", out)
return &csi.NodeUnstageVolumeResponse{}, nil
}
// NodeGetCapabilities returns the supported capabilities of the node server

View File

@ -37,6 +37,13 @@ const (
accessTypeNbd = "nbd"
rbd = "rbd"
// Output strings returned during invocation of "rbd unmap --device-type... <imageSpec>" when
// image is not found to be mapped. Used to ignore errors when attempting to unmap such images.
// The %s format specifier should contain the <imageSpec> string
// NOTE: When using devicePath instead of imageSpec, the error strings are different
rbdUnmapCmdkRbdMissingMap = "rbd: %s: not a mapped image or snapshot"
rbdUnmapCmdNbdMissingMap = "rbd-nbd: %s is not mapped"
)
var hasNBD = false
@ -195,23 +202,18 @@ func createPath(volOpt *rbdVolume, cr *util.Credentials) (string, error) {
mapOptions := []string{
"--id", cr.ID,
"-m", volOpt.Monitors,
"--keyfile=" + cr.KeyFile}
// Construct map and unmap variants for the command
mapOptions = append(mapOptions, "map", imagePath)
unmapOptions := []string{"unmap", imagePath}
"--keyfile=" + cr.KeyFile,
"map", imagePath,
}
// Choose access protocol
useNBD := false
accessType := accessTypeKRbd
if volOpt.Mounter == rbdTonbd && hasNBD {
accessType = accessTypeNbd
useNBD = true
}
// Update options with device type selection
mapOptions = append(mapOptions, "--device-type", accessType)
unmapOptions = append(unmapOptions, "--device-type", accessType)
// Execute map
output, err := execCommand(rbd, mapOptions)
@ -248,20 +250,38 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, cr *util.Crede
}
func detachRBDDevice(devicePath string) error {
nbdType := false
if strings.HasPrefix(devicePath, "/dev/nbd") {
nbdType = true
}
return detachRBDImageOrDeviceSpec(devicePath, false, nbdType)
}
// detachRBDImageOrDeviceSpec detaches an rbd imageSpec or devicePath, with additional checking
// when imageSpec is used to decide if image is already unmapped
func detachRBDImageOrDeviceSpec(imageOrDeviceSpec string, isImageSpec, ndbType bool) error {
var err error
var output []byte
klog.V(3).Infof("rbd: unmap device %s", devicePath)
accessType := accessTypeKRbd
if strings.HasPrefix(devicePath, "/dev/nbd") {
if ndbType {
accessType = accessTypeNbd
}
options := []string{"unmap", "--device-type", accessType, devicePath}
options := []string{"unmap", "--device-type", accessType, imageOrDeviceSpec}
output, err = execCommand(rbd, options)
if err != nil {
return fmt.Errorf("rbd: unmap failed %v, rbd output: %s", err, string(output))
// Messages for krbd and nbd differ, hence checking either of them for missing mapping
// This is not applicable when a device path is passed in
if isImageSpec &&
(strings.Contains(string(output), fmt.Sprintf(rbdUnmapCmdkRbdMissingMap, imageOrDeviceSpec)) ||
strings.Contains(string(output), fmt.Sprintf(rbdUnmapCmdNbdMissingMap, imageOrDeviceSpec))) {
// Devices found not to be mapped are treated as a successful detach
klog.Infof("image or device spec (%s) not mapped", imageOrDeviceSpec)
return nil
}
return fmt.Errorf("rbd: unmap for spec (%s) failed (%v): (%s)", imageOrDeviceSpec, err, string(output))
}
return nil

View File

@ -19,7 +19,10 @@ package rbd
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
@ -704,3 +707,76 @@ func getSnapInfo(monitors string, cr *util.Credentials, poolName, imageName, sna
return snpInfo, ErrSnapNotFound{snapName, fmt.Errorf("snap (%s) for image (%s) not found",
snapName, poolName+"/"+imageName)}
}
// rbdImageMetadataStash strongly typed JSON spec for stashed RBD image metadata
type rbdImageMetadataStash struct {
Version int `json:"Version"`
Pool string `json:"pool"`
ImageName string `json:"image"`
NbdAccess bool `json:"accessType"`
}
// file name in which image metadata is stashed
const stashFileName = "image-meta.json"
// stashRBDImageMetadata stashes required fields into the stashFileName at the passed in path, in
// JSON format
func stashRBDImageMetadata(volOptions *rbdVolume, path string) error {
var imgMeta = rbdImageMetadataStash{
Version: 1, // Stash a v1 for now, in case of changes later, there are no checks for this at present
Pool: volOptions.Pool,
ImageName: volOptions.RbdImageName,
}
imgMeta.NbdAccess = false
if volOptions.Mounter == rbdTonbd && hasNBD {
imgMeta.NbdAccess = true
}
encodedBytes, err := json.Marshal(imgMeta)
if err != nil {
return fmt.Errorf("failed to marshall JSON image metadata for image (%s) in pool (%s): (%v)",
volOptions.RbdImageName, volOptions.Pool, err)
}
fPath := filepath.Join(path, stashFileName)
err = ioutil.WriteFile(fPath, encodedBytes, 0600)
if err != nil {
return fmt.Errorf("failed to stash JSON image metadata for image (%s) in pool (%s) at path (%s): (%v)",
volOptions.RbdImageName, volOptions.Pool, fPath, err)
}
return nil
}
// lookupRBDImageMetadataStash reads and returns stashed image metadata at passed in path
func lookupRBDImageMetadataStash(path string) (rbdImageMetadataStash, error) {
var imgMeta rbdImageMetadataStash
fPath := filepath.Join(path, stashFileName)
encodedBytes, err := ioutil.ReadFile(fPath)
if err != nil {
if !os.IsNotExist(err) {
return imgMeta, fmt.Errorf("failed to read stashed JSON image metadata from path (%s): (%v)", fPath, err)
}
return imgMeta, ErrMissingStash{err}
}
err = json.Unmarshal(encodedBytes, &imgMeta)
if err != nil {
return imgMeta, fmt.Errorf("failed to unmarshall stashed JSON image metadata from path (%s): (%v)", fPath, err)
}
return imgMeta, nil
}
// cleanupRBDImageMetadataStash cleans up any stashed metadata at passed in path
func cleanupRBDImageMetadataStash(path string) error {
fPath := filepath.Join(path, stashFileName)
if err := os.Remove(fPath); err != nil {
return fmt.Errorf("failed to cleanup stashed JSON data (%s): (%v)", fPath, err)
}
return nil
}