2018-01-09 18:59:50 +00:00
|
|
|
/*
|
2019-04-03 08:46:15 +00:00
|
|
|
Copyright 2018 The Ceph-CSI Authors.
|
2018-01-09 18:59:50 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package rbd
|
|
|
|
|
|
|
|
import (
|
2019-08-24 09:14:15 +00:00
|
|
|
"context"
|
2018-01-09 18:59:50 +00:00
|
|
|
"fmt"
|
|
|
|
|
2020-04-17 09:23:49 +00:00
|
|
|
csicommon "github.com/ceph/ceph-csi/internal/csi-common"
|
|
|
|
"github.com/ceph/ceph-csi/internal/util"
|
2019-02-18 11:30:28 +00:00
|
|
|
|
2019-01-15 16:20:41 +00:00
|
|
|
"github.com/container-storage-interface/spec/lib/go/csi"
|
2019-02-08 07:50:21 +00:00
|
|
|
"github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
|
2018-03-06 22:33:57 +00:00
|
|
|
"google.golang.org/grpc/codes"
|
|
|
|
"google.golang.org/grpc/status"
|
2019-02-04 13:05:07 +00:00
|
|
|
"k8s.io/klog"
|
2018-01-09 18:59:50 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
oneGB = 1073741824
|
|
|
|
)
|
|
|
|
|
2019-01-28 11:47:06 +00:00
|
|
|
// ControllerServer struct of rbd CSI driver with supported methods of CSI
|
|
|
|
// controller server spec.
|
2019-01-17 07:51:06 +00:00
|
|
|
type ControllerServer struct {
|
2018-01-09 18:59:50 +00:00
|
|
|
*csicommon.DefaultControllerServer
|
2019-05-31 18:09:24 +00:00
|
|
|
MetadataStore util.CachePersister
|
2019-09-12 04:53:37 +00:00
|
|
|
// A map storing all volumes with ongoing operations so that additional operations
|
|
|
|
// for that same volume (as defined by VolumeID/volume name) return an Aborted error
|
|
|
|
VolumeLocks *util.VolumeLocks
|
|
|
|
|
|
|
|
// A map storing all volumes with ongoing operations so that additional operations
|
|
|
|
// for that same snapshot (as defined by SnapshotID/snapshot name) return an Aborted error
|
|
|
|
SnapshotLocks *util.VolumeLocks
|
2018-01-09 18:59:50 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.CreateVolumeRequest) error {
|
2018-03-06 22:33:57 +00:00
|
|
|
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_VOLUME); err != nil {
|
2019-12-16 09:29:04 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "invalid create volume req: %v"), protosanitizer.StripSecrets(req))
|
2019-01-17 05:27:55 +00:00
|
|
|
return err
|
2018-01-09 18:59:50 +00:00
|
|
|
}
|
2018-03-06 22:33:57 +00:00
|
|
|
// Check sanity of request Name, Volume Capabilities
|
2019-06-10 06:48:41 +00:00
|
|
|
if req.Name == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return status.Error(codes.InvalidArgument, "volume Name cannot be empty")
|
2018-03-06 22:33:57 +00:00
|
|
|
}
|
|
|
|
if req.VolumeCapabilities == nil {
|
2019-05-13 04:47:17 +00:00
|
|
|
return status.Error(codes.InvalidArgument, "volume Capabilities cannot be empty")
|
2018-03-06 22:33:57 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
options := req.GetParameters()
|
2019-06-10 06:48:41 +00:00
|
|
|
if value, ok := options["clusterID"]; !ok || value == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return status.Error(codes.InvalidArgument, "missing or empty cluster ID to provision volume from")
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
2019-06-10 06:48:41 +00:00
|
|
|
if value, ok := options["pool"]; !ok || value == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return status.Error(codes.InvalidArgument, "missing or empty pool name to provision volume from")
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
2019-09-10 09:56:08 +00:00
|
|
|
|
|
|
|
if value, ok := options["dataPool"]; ok && value == "" {
|
|
|
|
return status.Error(codes.InvalidArgument, "empty datapool name to provision volume from")
|
|
|
|
}
|
2020-02-24 13:19:42 +00:00
|
|
|
if value, ok := options["volumeNamePrefix"]; ok && value == "" {
|
|
|
|
return status.Error(codes.InvalidArgument, "empty volume name prefix to provision volume from")
|
|
|
|
}
|
2019-01-17 05:27:55 +00:00
|
|
|
return nil
|
|
|
|
}
|
2018-01-16 01:52:28 +00:00
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
func (cs *ControllerServer) parseVolCreateRequest(ctx context.Context, req *csi.CreateVolumeRequest) (*rbdVolume, error) {
|
2019-01-29 05:49:16 +00:00
|
|
|
// TODO (sbezverk) Last check for not exceeding total storage capacity
|
|
|
|
|
2019-03-14 00:18:04 +00:00
|
|
|
isMultiNode := false
|
|
|
|
isBlock := false
|
|
|
|
for _, cap := range req.VolumeCapabilities {
|
2020-01-24 16:26:56 +00:00
|
|
|
// RO modes need to be handled independently (ie right now even if access mode is RO, they'll be RW upon attach)
|
2019-03-18 16:08:24 +00:00
|
|
|
if cap.GetAccessMode().GetMode() == csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER {
|
2019-03-14 00:18:04 +00:00
|
|
|
isMultiNode = true
|
|
|
|
}
|
|
|
|
if cap.GetBlock() != nil {
|
|
|
|
isBlock = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We want to fail early if the user is trying to create a RWX on a non-block type device
|
|
|
|
if isMultiNode && !isBlock {
|
|
|
|
return nil, status.Error(codes.InvalidArgument, "multi node access modes are only supported on rbd `block` type volumes")
|
|
|
|
}
|
|
|
|
|
|
|
|
// if it's NOT SINGLE_NODE_WRITER and it's BLOCK we'll set the parameter to ignore the in-use checks
|
2020-01-29 11:44:45 +00:00
|
|
|
rbdVol, err := genVolFromVolumeOptions(ctx, req.GetParameters(), req.GetSecrets(), (isMultiNode && isBlock), false)
|
2019-01-29 05:49:16 +00:00
|
|
|
if err != nil {
|
2019-03-07 12:56:47 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, err.Error())
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
rbdVol.RequestName = req.GetName()
|
|
|
|
|
2019-01-29 05:49:16 +00:00
|
|
|
// Volume Size - Default is 1 GiB
|
|
|
|
volSizeBytes := int64(oneGB)
|
|
|
|
if req.GetCapacityRange() != nil {
|
|
|
|
volSizeBytes = req.GetCapacityRange().GetRequiredBytes()
|
|
|
|
}
|
2019-03-01 12:08:17 +00:00
|
|
|
|
2019-09-25 08:35:33 +00:00
|
|
|
// always round up the request size in bytes to the nearest MiB/GiB
|
|
|
|
rbdVol.VolSize = util.RoundOffBytes(volSizeBytes)
|
2019-01-29 05:49:16 +00:00
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// start with pool the same as journal pool, in case there is a topology
|
|
|
|
// based split, pool for the image will be updated subsequently
|
|
|
|
rbdVol.JournalPool = rbdVol.Pool
|
|
|
|
|
|
|
|
// store topology information from the request
|
|
|
|
rbdVol.TopologyPools, rbdVol.TopologyRequirement, err = util.GetTopologyFromRequest(req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, status.Error(codes.InvalidArgument, err.Error())
|
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// NOTE: rbdVol does not contain VolID and RbdImageName populated, everything
|
|
|
|
// else is populated post create request parsing
|
2019-01-29 05:49:16 +00:00
|
|
|
return rbdVol, nil
|
|
|
|
}
|
|
|
|
|
2020-04-21 07:52:26 +00:00
|
|
|
func buildCreateVolumeResponse(ctx context.Context, req *csi.CreateVolumeRequest, rbdVol *rbdVolume) (*csi.CreateVolumeResponse, error) {
|
|
|
|
if rbdVol.Encrypted {
|
|
|
|
err := rbdVol.ensureEncryptionMetadataSet(rbdImageRequiresEncryption)
|
|
|
|
if err != nil {
|
|
|
|
klog.Error(util.Log(ctx, err.Error()))
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
volumeContext := req.GetParameters()
|
|
|
|
volumeContext["pool"] = rbdVol.Pool
|
|
|
|
volumeContext["journalPool"] = rbdVol.JournalPool
|
|
|
|
volumeContext["imageName"] = rbdVol.RbdImageName
|
|
|
|
volume := &csi.Volume{
|
|
|
|
VolumeId: rbdVol.VolID,
|
|
|
|
CapacityBytes: rbdVol.VolSize,
|
|
|
|
VolumeContext: volumeContext,
|
|
|
|
ContentSource: req.GetVolumeContentSource(),
|
|
|
|
}
|
|
|
|
if rbdVol.Topology != nil {
|
|
|
|
volume.AccessibleTopology =
|
|
|
|
[]*csi.Topology{
|
|
|
|
{
|
|
|
|
Segments: rbdVol.Topology,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &csi.CreateVolumeResponse{Volume: volume}, nil
|
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// CreateVolume creates the volume in backend
|
2019-01-17 07:51:06 +00:00
|
|
|
func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) {
|
2019-08-22 16:57:23 +00:00
|
|
|
if err := cs.validateVolumeReq(ctx, req); err != nil {
|
2019-01-17 05:27:55 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2019-06-01 21:26:42 +00:00
|
|
|
|
2020-01-07 13:45:52 +00:00
|
|
|
// TODO: create/get a connection from the the ConnPool, and do not pass
|
|
|
|
// the credentials to any of the utility functions.
|
2019-06-25 19:29:17 +00:00
|
|
|
cr, err := util.NewUserCredentials(req.GetSecrets())
|
2019-06-01 21:26:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
2019-06-25 19:29:17 +00:00
|
|
|
defer cr.DeleteCredentials()
|
2019-06-01 21:26:42 +00:00
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
rbdVol, err := cs.parseVolCreateRequest(ctx, req)
|
2019-04-22 21:35:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-01-07 13:45:52 +00:00
|
|
|
defer rbdVol.Destroy()
|
2019-02-14 19:31:26 +00:00
|
|
|
|
2019-09-12 04:53:37 +00:00
|
|
|
// Existence and conflict checks
|
|
|
|
if acquired := cs.VolumeLocks.TryAcquire(req.GetName()); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.VolumeOperationAlreadyExistsFmt), req.GetName())
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, req.GetName())
|
|
|
|
}
|
|
|
|
defer cs.VolumeLocks.Release(req.GetName())
|
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
|
|
|
|
2020-03-17 12:57:01 +00:00
|
|
|
err = rbdVol.Connect(cr)
|
|
|
|
if err != nil {
|
|
|
|
klog.Errorf(util.Log(ctx, "failed to connect to volume %v: %v"), rbdVol.RbdImageName, err)
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
found, err := rbdVol.Exists(ctx)
|
2019-04-22 21:35:39 +00:00
|
|
|
if err != nil {
|
|
|
|
if _, ok := err.(ErrVolNameConflict); ok {
|
|
|
|
return nil, status.Error(codes.AlreadyExists, err.Error())
|
2018-03-06 22:33:57 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
if found {
|
2020-04-21 07:52:26 +00:00
|
|
|
return buildCreateVolumeResponse(ctx, req, rbdVol)
|
2018-03-06 22:33:57 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
rbdSnap, err := cs.checkSnapshotSource(ctx, req, cr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = reserveVol(ctx, rbdVol, rbdSnap, cr)
|
2018-01-16 01:52:28 +00:00
|
|
|
if err != nil {
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
2018-01-16 01:52:28 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2019-08-22 17:19:06 +00:00
|
|
|
errDefer := undoVolReservation(ctx, rbdVol, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if errDefer != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "failed undoing reservation of volume: %s (%s)"), req.GetName(), errDefer)
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
2018-01-16 01:52:28 +00:00
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
err = createBackingImage(ctx, cr, rbdVol, rbdSnap)
|
2019-01-28 19:55:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2018-01-09 18:59:50 +00:00
|
|
|
}
|
2019-02-14 19:31:26 +00:00
|
|
|
|
2019-12-13 11:41:32 +00:00
|
|
|
if rbdVol.Encrypted {
|
2020-01-09 10:31:07 +00:00
|
|
|
err = rbdVol.ensureEncryptionMetadataSet(rbdImageRequiresEncryption)
|
2019-12-13 11:41:32 +00:00
|
|
|
if err != nil {
|
2020-05-28 17:08:25 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to save encryption status, deleting image %s: %s"),
|
|
|
|
rbdVol.RbdImageName, err)
|
|
|
|
if deleteErr := deleteImage(ctx, rbdVol, cr); deleteErr != nil {
|
2019-12-13 11:41:32 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to delete rbd image: %s/%s with error: %v"),
|
|
|
|
rbdVol.Pool, rbdVol.RbdImageName, deleteErr)
|
|
|
|
return nil, deleteErr
|
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
volumeContext := req.GetParameters()
|
|
|
|
volumeContext["pool"] = rbdVol.Pool
|
|
|
|
volumeContext["journalPool"] = rbdVol.JournalPool
|
2020-04-18 08:57:05 +00:00
|
|
|
volumeContext["imageName"] = rbdVol.RbdImageName
|
2020-01-24 16:26:56 +00:00
|
|
|
volume := &csi.Volume{
|
|
|
|
VolumeId: rbdVol.VolID,
|
|
|
|
CapacityBytes: rbdVol.VolSize,
|
|
|
|
VolumeContext: volumeContext,
|
|
|
|
ContentSource: req.GetVolumeContentSource(),
|
|
|
|
}
|
|
|
|
if rbdVol.Topology != nil {
|
|
|
|
volume.AccessibleTopology =
|
|
|
|
[]*csi.Topology{
|
|
|
|
{
|
|
|
|
Segments: rbdVol.Topology,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &csi.CreateVolumeResponse{Volume: volume}, nil
|
2018-01-09 18:59:50 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
func createBackingImage(ctx context.Context, cr *util.Credentials, rbdVol *rbdVolume, rbdSnap *rbdSnapshot) error {
|
2019-01-28 19:55:10 +00:00
|
|
|
var err error
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
if rbdSnap != nil {
|
|
|
|
err = restoreSnapshot(ctx, rbdVol, rbdSnap, cr)
|
2019-06-01 21:26:42 +00:00
|
|
|
if err != nil {
|
2020-01-24 16:26:56 +00:00
|
|
|
return err
|
2019-06-01 21:26:42 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "created volume %s from snapshot %s"), rbdVol.RequestName, rbdSnap.RbdSnapName)
|
|
|
|
return nil
|
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
err = createImage(ctx, rbdVol, cr)
|
|
|
|
if err != nil {
|
|
|
|
klog.Errorf(util.Log(ctx, "failed to create volume: %v"), err)
|
|
|
|
return status.Error(codes.Internal, err.Error())
|
2019-01-28 19:55:10 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "created volume %s backed by image %s"), rbdVol.RequestName, rbdVol.RbdImageName)
|
|
|
|
|
2019-01-28 19:55:10 +00:00
|
|
|
return nil
|
|
|
|
}
|
2019-12-13 11:41:32 +00:00
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
func (cs *ControllerServer) checkSnapshotSource(ctx context.Context, req *csi.CreateVolumeRequest,
|
|
|
|
cr *util.Credentials) (*rbdSnapshot, error) {
|
|
|
|
if req.VolumeContentSource == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2019-01-17 05:27:55 +00:00
|
|
|
snapshot := req.VolumeContentSource.GetSnapshot()
|
|
|
|
if snapshot == nil {
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "volume Snapshot cannot be empty")
|
2019-01-17 05:27:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
snapshotID := snapshot.GetSnapshotId()
|
2019-06-10 06:48:41 +00:00
|
|
|
if snapshotID == "" {
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "volume Snapshot ID cannot be empty")
|
2019-01-17 05:27:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
rbdSnap := &rbdSnapshot{}
|
2020-01-24 16:26:56 +00:00
|
|
|
if err := genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr); err != nil {
|
2019-04-22 21:35:39 +00:00
|
|
|
if _, ok := err.(ErrSnapNotFound); !ok {
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
2020-01-31 08:49:11 +00:00
|
|
|
|
|
|
|
if _, ok := err.(util.ErrPoolNotFound); ok {
|
|
|
|
klog.Errorf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err)
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, err.Error())
|
2020-01-31 08:49:11 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "missing requested Snapshot ID")
|
2019-01-17 05:27:55 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
return rbdSnap, nil
|
2019-01-17 05:27:55 +00:00
|
|
|
}
|
2019-01-17 06:49:35 +00:00
|
|
|
|
2019-05-31 18:09:24 +00:00
|
|
|
// DeleteLegacyVolume deletes a volume provisioned using version 1.0.0 of the plugin
|
2019-08-22 16:57:23 +00:00
|
|
|
func (cs *ControllerServer) DeleteLegacyVolume(ctx context.Context, req *csi.DeleteVolumeRequest, cr *util.Credentials) (*csi.DeleteVolumeResponse, error) {
|
2019-05-31 18:09:24 +00:00
|
|
|
volumeID := req.GetVolumeId()
|
|
|
|
|
|
|
|
if cs.MetadataStore == nil {
|
|
|
|
return nil, status.Errorf(codes.InvalidArgument, "missing metadata store configuration to"+
|
|
|
|
" proceed with deleting legacy volume ID (%s)", volumeID)
|
|
|
|
}
|
|
|
|
|
2019-09-12 04:53:37 +00:00
|
|
|
if acquired := cs.VolumeLocks.TryAcquire(volumeID); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.VolumeOperationAlreadyExistsFmt), volumeID)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, volumeID)
|
|
|
|
}
|
|
|
|
defer cs.VolumeLocks.Release(volumeID)
|
2019-05-31 18:09:24 +00:00
|
|
|
|
|
|
|
rbdVol := &rbdVolume{}
|
2020-01-07 13:45:52 +00:00
|
|
|
defer rbdVol.Destroy()
|
2019-05-31 18:09:24 +00:00
|
|
|
if err := cs.MetadataStore.Get(volumeID, rbdVol); err != nil {
|
|
|
|
if err, ok := err.(*util.CacheEntryNotFound); ok {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "metadata for legacy volume %s not found, assuming the volume to be already deleted (%v)"), volumeID, err)
|
2019-05-31 18:09:24 +00:00
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fill up Monitors
|
|
|
|
if err := updateMons(rbdVol, nil, req.GetSecrets()); err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update rbdImageName as the VolName when dealing with version 1 volumes
|
|
|
|
rbdVol.RbdImageName = rbdVol.VolName
|
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "deleting legacy volume %s"), rbdVol.VolName)
|
|
|
|
if err := deleteImage(ctx, rbdVol, cr); err != nil {
|
2019-05-31 18:09:24 +00:00
|
|
|
// TODO: can we detect "already deleted" situations here and proceed?
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to delete legacy rbd image: %s/%s with error: %v"), rbdVol.Pool, rbdVol.VolName, err)
|
2019-05-31 18:09:24 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := cs.MetadataStore.Delete(volumeID); err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
|
|
|
}
|
|
|
|
|
2019-01-28 11:47:06 +00:00
|
|
|
// DeleteVolume deletes the volume in backend and removes the volume metadata
|
|
|
|
// from store
|
2019-01-17 07:51:06 +00:00
|
|
|
func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) (*csi.DeleteVolumeResponse, error) {
|
2018-03-06 22:33:57 +00:00
|
|
|
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_VOLUME); err != nil {
|
2019-12-16 09:29:04 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "invalid delete volume req: %v"), protosanitizer.StripSecrets(req))
|
2018-01-09 18:59:50 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2019-06-01 21:26:42 +00:00
|
|
|
|
2019-06-25 19:29:17 +00:00
|
|
|
cr, err := util.NewUserCredentials(req.GetSecrets())
|
2019-06-01 21:26:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
2019-06-25 19:29:17 +00:00
|
|
|
defer cr.DeleteCredentials()
|
2019-06-01 21:26:42 +00:00
|
|
|
|
2018-01-09 18:59:50 +00:00
|
|
|
// For now the image get unconditionally deleted, but here retention policy can be checked
|
2018-01-16 01:52:28 +00:00
|
|
|
volumeID := req.GetVolumeId()
|
2019-04-22 21:35:39 +00:00
|
|
|
if volumeID == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "empty volume ID in request")
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
2019-01-28 13:59:16 +00:00
|
|
|
|
2019-09-12 04:53:37 +00:00
|
|
|
if acquired := cs.VolumeLocks.TryAcquire(volumeID); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.VolumeOperationAlreadyExistsFmt), volumeID)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, volumeID)
|
|
|
|
}
|
|
|
|
defer cs.VolumeLocks.Release(volumeID)
|
|
|
|
|
2020-03-17 13:39:35 +00:00
|
|
|
rbdVol, err := genVolFromVolID(ctx, volumeID, cr, req.GetSecrets())
|
|
|
|
if err != nil {
|
2020-05-11 08:48:52 +00:00
|
|
|
switch err.(type) {
|
|
|
|
case util.ErrPoolNotFound:
|
2020-01-31 08:49:11 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "failed to get backend volume for %s: %v"), volumeID, err)
|
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
|
|
|
|
2019-05-31 18:09:24 +00:00
|
|
|
// If error is ErrInvalidVolID it could be a version 1.0.0 or lower volume, attempt
|
|
|
|
// to process it as such
|
2020-05-11 08:48:52 +00:00
|
|
|
case ErrInvalidVolID:
|
2019-05-31 18:09:24 +00:00
|
|
|
if isLegacyVolumeID(volumeID) {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.V(2).Infof(util.Log(ctx, "attempting deletion of potential legacy volume (%s)"), volumeID)
|
|
|
|
return cs.DeleteLegacyVolume(ctx, req, cr)
|
2019-05-31 18:09:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Consider unknown volumeID as a successfully deleted volume
|
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// if error is ErrKeyNotFound, then a previous attempt at deletion was complete
|
|
|
|
// or partially complete (image and imageOMap are garbage collected already), hence return
|
|
|
|
// success as deletion is complete
|
2020-05-11 08:48:52 +00:00
|
|
|
case util.ErrKeyNotFound:
|
2020-01-31 08:49:11 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "Failed to volume options for %s: %v"), volumeID, err)
|
2018-10-09 10:08:56 +00:00
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
2019-02-25 17:09:21 +00:00
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// All errors other than ErrImageNotFound should return an error back to the caller
|
2020-05-11 08:48:52 +00:00
|
|
|
case ErrImageNotFound:
|
|
|
|
break
|
|
|
|
default:
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
2019-02-18 08:22:52 +00:00
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// If error is ErrImageNotFound then we failed to find the image, but found the imageOMap
|
|
|
|
// to lead us to the image, hence the imageOMap needs to be garbage collected, by calling
|
|
|
|
// unreserve for the same
|
2019-09-12 04:53:37 +00:00
|
|
|
if acquired := cs.VolumeLocks.TryAcquire(rbdVol.RequestName); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.VolumeOperationAlreadyExistsFmt), rbdVol.RequestName)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, rbdVol.RequestName)
|
|
|
|
}
|
|
|
|
defer cs.VolumeLocks.Release(rbdVol.RequestName)
|
2019-02-18 08:22:52 +00:00
|
|
|
|
2020-01-29 11:44:45 +00:00
|
|
|
if err = undoVolReservation(ctx, rbdVol, cr); err != nil {
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
2019-02-26 13:19:00 +00:00
|
|
|
}
|
2020-03-17 13:39:35 +00:00
|
|
|
defer rbdVol.Destroy()
|
2019-02-26 13:19:00 +00:00
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// lock out parallel create requests against the same volume name as we
|
|
|
|
// cleanup the image and associated omaps for the same
|
2019-09-12 04:53:37 +00:00
|
|
|
if acquired := cs.VolumeLocks.TryAcquire(rbdVol.RequestName); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.VolumeOperationAlreadyExistsFmt), rbdVol.RequestName)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, rbdVol.RequestName)
|
|
|
|
}
|
|
|
|
defer cs.VolumeLocks.Release(rbdVol.RequestName)
|
2019-02-18 08:22:52 +00:00
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// Deleting rbd image
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "deleting image %s"), rbdVol.RbdImageName)
|
2020-01-29 11:44:45 +00:00
|
|
|
if err = deleteImage(ctx, rbdVol, cr); err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to delete rbd image: %s/%s with error: %v"),
|
2019-04-22 21:35:39 +00:00
|
|
|
rbdVol.Pool, rbdVol.RbdImageName, err)
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
2019-02-18 08:22:52 +00:00
|
|
|
}
|
|
|
|
|
2020-01-29 11:44:45 +00:00
|
|
|
if err = undoVolReservation(ctx, rbdVol, cr); err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to remove reservation for volume (%s) with backing image (%s) (%s)"),
|
2019-05-31 18:09:24 +00:00
|
|
|
rbdVol.RequestName, rbdVol.RbdImageName, err)
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
|
2020-01-29 11:44:45 +00:00
|
|
|
if rbdVol.Encrypted {
|
|
|
|
if err = rbdVol.KMS.DeletePassphrase(rbdVol.VolID); err != nil {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "failed to clean the passphrase for volume %s: %s"), rbdVol.VolID, err)
|
2020-01-29 11:44:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
return &csi.DeleteVolumeResponse{}, nil
|
2019-02-18 08:22:52 +00:00
|
|
|
}
|
|
|
|
|
2019-01-28 11:47:06 +00:00
|
|
|
// ValidateVolumeCapabilities checks whether the volume capabilities requested
|
|
|
|
// are supported.
|
2019-01-17 07:51:06 +00:00
|
|
|
func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req *csi.ValidateVolumeCapabilitiesRequest) (*csi.ValidateVolumeCapabilitiesResponse, error) {
|
2019-04-22 21:35:39 +00:00
|
|
|
if req.GetVolumeId() == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "empty volume ID in request")
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(req.VolumeCapabilities) == 0 {
|
2019-05-13 04:47:17 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "empty volume capabilities in request")
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
|
|
|
|
2019-03-13 18:19:14 +00:00
|
|
|
for _, cap := range req.VolumeCapabilities {
|
|
|
|
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
|
|
|
|
return &csi.ValidateVolumeCapabilitiesResponse{Message: ""}, nil
|
2018-01-18 19:13:08 +00:00
|
|
|
}
|
|
|
|
}
|
2018-11-24 19:18:24 +00:00
|
|
|
return &csi.ValidateVolumeCapabilitiesResponse{
|
|
|
|
Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
|
|
|
|
VolumeCapabilities: req.VolumeCapabilities,
|
|
|
|
},
|
|
|
|
}, nil
|
2018-01-18 19:13:08 +00:00
|
|
|
}
|
|
|
|
|
2019-01-28 11:47:06 +00:00
|
|
|
// CreateSnapshot creates the snapshot in backend and stores metadata
|
|
|
|
// in store
|
2019-02-14 21:25:32 +00:00
|
|
|
// nolint: gocyclo
|
2019-01-17 07:51:06 +00:00
|
|
|
func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error) {
|
2019-08-22 16:57:23 +00:00
|
|
|
if err := cs.validateSnapshotReq(ctx, req); err != nil {
|
2019-01-28 19:55:10 +00:00
|
|
|
return nil, err
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-01-28 13:59:16 +00:00
|
|
|
|
2019-06-25 19:29:17 +00:00
|
|
|
cr, err := util.NewUserCredentials(req.GetSecrets())
|
2019-06-01 21:26:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
2019-06-25 19:29:17 +00:00
|
|
|
defer cr.DeleteCredentials()
|
2019-06-01 21:26:42 +00:00
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// Fetch source volume information
|
2020-03-17 13:39:35 +00:00
|
|
|
rbdVol, err := genVolFromVolID(ctx, req.GetSourceVolumeId(), cr, req.GetSecrets())
|
2019-04-22 21:35:39 +00:00
|
|
|
if err != nil {
|
2020-05-11 08:48:52 +00:00
|
|
|
switch err.(type) {
|
|
|
|
case ErrImageNotFound:
|
|
|
|
err = status.Errorf(codes.NotFound, "source Volume ID %s not found", req.GetSourceVolumeId())
|
|
|
|
case util.ErrPoolNotFound:
|
2020-01-31 08:49:11 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to get backend volume for %s: %v"), req.GetSourceVolumeId(), err)
|
2020-05-11 08:48:52 +00:00
|
|
|
err = status.Errorf(codes.NotFound, err.Error())
|
|
|
|
default:
|
|
|
|
err = status.Errorf(codes.Internal, err.Error())
|
2020-01-31 08:49:11 +00:00
|
|
|
}
|
2020-05-11 08:48:52 +00:00
|
|
|
return nil, err
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2020-03-17 13:39:35 +00:00
|
|
|
defer rbdVol.Destroy()
|
2018-08-08 05:42:17 +00:00
|
|
|
|
2020-01-29 11:44:45 +00:00
|
|
|
// TODO: re-encrypt snapshot with a new passphrase
|
|
|
|
if rbdVol.Encrypted {
|
|
|
|
return nil, status.Errorf(codes.Unimplemented, "source Volume %s is encrypted, "+
|
|
|
|
"snapshotting is not supported currently", rbdVol.VolID)
|
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// Check if source volume was created with required image features for snaps
|
|
|
|
if !hasSnapshotFeature(rbdVol.ImageFeatures) {
|
|
|
|
return nil, status.Errorf(codes.InvalidArgument, "volume(%s) has not snapshot feature(layering)", req.GetSourceVolumeId())
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// Create snap volume
|
2019-08-22 16:57:23 +00:00
|
|
|
rbdSnap := genSnapFromOptions(ctx, rbdVol, req.GetParameters())
|
2019-04-22 21:35:39 +00:00
|
|
|
rbdSnap.RbdImageName = rbdVol.RbdImageName
|
|
|
|
rbdSnap.SizeBytes = rbdVol.VolSize
|
|
|
|
rbdSnap.SourceVolumeID = req.GetSourceVolumeId()
|
|
|
|
rbdSnap.RequestName = req.GetName()
|
|
|
|
|
2019-09-12 04:53:37 +00:00
|
|
|
if acquired := cs.SnapshotLocks.TryAcquire(req.GetName()); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.SnapshotOperationAlreadyExistsFmt), req.GetName())
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, req.GetName())
|
|
|
|
}
|
|
|
|
defer cs.SnapshotLocks.Release(req.GetName())
|
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
// Need to check for already existing snapshot name, and if found
|
|
|
|
// check for the requested source volume id and already allocated source volume id
|
2019-08-22 16:57:23 +00:00
|
|
|
found, err := checkSnapExists(ctx, rbdSnap, cr)
|
2018-08-08 05:42:17 +00:00
|
|
|
if err != nil {
|
2019-05-14 19:15:01 +00:00
|
|
|
if _, ok := err.(util.ErrSnapNameConflict); ok {
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Error(codes.AlreadyExists, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, status.Errorf(codes.Internal, err.Error())
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
if found {
|
|
|
|
return &csi.CreateSnapshotResponse{
|
|
|
|
Snapshot: &csi.Snapshot{
|
|
|
|
SizeBytes: rbdSnap.SizeBytes,
|
|
|
|
SnapshotId: rbdSnap.SnapID,
|
|
|
|
SourceVolumeId: rbdSnap.SourceVolumeID,
|
|
|
|
CreationTime: rbdSnap.CreatedAt,
|
|
|
|
ReadyToUse: true,
|
|
|
|
},
|
|
|
|
}, nil
|
2018-08-09 13:07:13 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
err = reserveSnap(ctx, rbdSnap, cr)
|
2019-01-28 19:55:10 +00:00
|
|
|
if err != nil {
|
2019-03-07 12:56:47 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
2019-01-28 19:55:10 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2019-08-22 17:19:06 +00:00
|
|
|
errDefer := undoSnapReservation(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if errDefer != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "failed undoing reservation of snapshot: %s %v"), req.GetName(), errDefer)
|
2019-04-22 21:35:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
2019-01-28 19:55:10 +00:00
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
err = cs.doSnapshot(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2019-01-28 19:55:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return &csi.CreateSnapshotResponse{
|
|
|
|
Snapshot: &csi.Snapshot{
|
|
|
|
SizeBytes: rbdSnap.SizeBytes,
|
2019-04-22 21:35:39 +00:00
|
|
|
SnapshotId: rbdSnap.SnapID,
|
2019-01-28 19:55:10 +00:00
|
|
|
SourceVolumeId: req.GetSourceVolumeId(),
|
2019-04-22 21:35:39 +00:00
|
|
|
CreationTime: rbdSnap.CreatedAt,
|
|
|
|
ReadyToUse: true,
|
2019-01-28 19:55:10 +00:00
|
|
|
},
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
func (cs *ControllerServer) validateSnapshotReq(ctx context.Context, req *csi.CreateSnapshotRequest) error {
|
2019-01-28 19:55:10 +00:00
|
|
|
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT); err != nil {
|
2019-12-16 09:29:04 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "invalid create snapshot req: %v"), protosanitizer.StripSecrets(req))
|
2019-01-28 19:55:10 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check sanity of request Snapshot Name, Source Volume Id
|
2019-06-10 06:48:41 +00:00
|
|
|
if req.Name == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return status.Error(codes.InvalidArgument, "snapshot Name cannot be empty")
|
2019-01-28 19:55:10 +00:00
|
|
|
}
|
2019-06-10 06:48:41 +00:00
|
|
|
if req.SourceVolumeId == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return status.Error(codes.InvalidArgument, "source Volume ID cannot be empty")
|
2019-01-28 19:55:10 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
options := req.GetParameters()
|
|
|
|
if value, ok := options["snapshotNamePrefix"]; ok && value == "" {
|
|
|
|
return status.Error(codes.InvalidArgument, "empty snapshot name prefix to provision snapshot from")
|
|
|
|
}
|
|
|
|
|
2019-01-28 19:55:10 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
func (cs *ControllerServer) doSnapshot(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials) (err error) {
|
|
|
|
err = createSnapshot(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
// If snap creation fails, even due to snapname already used, fail, next attempt will get a new
|
|
|
|
// uuid for use as the snap name
|
2018-08-08 05:42:17 +00:00
|
|
|
if err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to create snapshot: %v"), err)
|
2019-04-22 21:35:39 +00:00
|
|
|
return status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
errDefer := deleteSnapshot(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if errDefer != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to delete snapshot: %v"), errDefer)
|
2019-04-22 21:35:39 +00:00
|
|
|
err = fmt.Errorf("snapshot created but failed to delete snapshot due to"+
|
|
|
|
" other failures: %v", err)
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
err = status.Error(codes.Internal, err.Error())
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
}()
|
2019-08-22 16:57:23 +00:00
|
|
|
err = protectSnapshot(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to protect snapshot: %v"), err)
|
2019-04-22 21:35:39 +00:00
|
|
|
return status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
defer func() {
|
2018-08-08 05:42:17 +00:00
|
|
|
if err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
errDefer := unprotectSnapshot(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if errDefer != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to unprotect snapshot: %v"), errDefer)
|
2019-04-22 21:35:39 +00:00
|
|
|
err = fmt.Errorf("snapshot created but failed to unprotect snapshot due to"+
|
|
|
|
" other failures: %v", err)
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
err = status.Error(codes.Internal, err.Error())
|
2018-08-09 13:07:06 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
}()
|
|
|
|
|
2019-08-22 16:57:23 +00:00
|
|
|
err = getSnapshotMetadata(ctx, rbdSnap, cr)
|
2019-04-22 21:35:39 +00:00
|
|
|
if err != nil {
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to fetch snapshot metadata: %v"), err)
|
2019-04-22 21:35:39 +00:00
|
|
|
return status.Error(codes.Internal, err.Error())
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-04-22 21:35:39 +00:00
|
|
|
|
2019-01-28 19:55:10 +00:00
|
|
|
return nil
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
|
|
|
|
2019-01-28 11:47:06 +00:00
|
|
|
// DeleteSnapshot deletes the snapshot in backend and removes the
|
2019-06-10 06:48:41 +00:00
|
|
|
// snapshot metadata from store
|
2019-01-17 07:51:06 +00:00
|
|
|
func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error) {
|
2018-08-08 05:42:17 +00:00
|
|
|
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT); err != nil {
|
2019-12-16 09:29:04 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "invalid delete snapshot req: %v"), protosanitizer.StripSecrets(req))
|
2018-08-08 05:42:17 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-06-25 19:29:17 +00:00
|
|
|
cr, err := util.NewUserCredentials(req.GetSecrets())
|
2019-06-01 21:26:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
2019-06-25 19:29:17 +00:00
|
|
|
defer cr.DeleteCredentials()
|
2019-06-01 21:26:42 +00:00
|
|
|
|
2018-08-08 05:42:17 +00:00
|
|
|
snapshotID := req.GetSnapshotId()
|
2019-06-10 06:48:41 +00:00
|
|
|
if snapshotID == "" {
|
2019-05-13 04:47:17 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "snapshot ID cannot be empty")
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
2019-01-28 13:59:16 +00:00
|
|
|
|
2019-09-12 04:53:37 +00:00
|
|
|
if acquired := cs.SnapshotLocks.TryAcquire(snapshotID); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.SnapshotOperationAlreadyExistsFmt), snapshotID)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, snapshotID)
|
|
|
|
}
|
|
|
|
defer cs.SnapshotLocks.Release(snapshotID)
|
|
|
|
|
2018-08-08 05:42:17 +00:00
|
|
|
rbdSnap := &rbdSnapshot{}
|
2019-08-22 16:57:23 +00:00
|
|
|
if err = genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr); err != nil {
|
2020-01-31 08:49:11 +00:00
|
|
|
// if error is ErrPoolNotFound, the pool is already deleted we dont
|
|
|
|
// need to worry about deleting snapshot or omap data, return success
|
|
|
|
if _, ok := err.(util.ErrPoolNotFound); ok {
|
|
|
|
klog.Warningf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err)
|
|
|
|
return &csi.DeleteSnapshotResponse{}, nil
|
|
|
|
}
|
|
|
|
|
2019-06-28 01:10:32 +00:00
|
|
|
// if error is ErrKeyNotFound, then a previous attempt at deletion was complete
|
|
|
|
// or partially complete (snap and snapOMap are garbage collected already), hence return
|
|
|
|
// success as deletion is complete
|
|
|
|
if _, ok := err.(util.ErrKeyNotFound); ok {
|
|
|
|
return &csi.DeleteSnapshotResponse{}, nil
|
|
|
|
}
|
|
|
|
|
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
|
|
|
// All errors other than ErrSnapNotFound should return an error back to the caller
|
2019-04-22 21:35:39 +00:00
|
|
|
if _, ok := err.(ErrSnapNotFound); !ok {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
2019-02-25 17:09:21 +00:00
|
|
|
}
|
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
|
|
|
|
|
|
|
// Consider missing snap as already deleted, and proceed to remove the omap values,
|
2019-09-12 04:53:37 +00:00
|
|
|
// safeguarding against parallel create or delete requests against the
|
|
|
|
// same name.
|
|
|
|
if acquired := cs.SnapshotLocks.TryAcquire(rbdSnap.RequestName); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.SnapshotOperationAlreadyExistsFmt), rbdSnap.RequestName)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, rbdSnap.RequestName)
|
|
|
|
}
|
|
|
|
defer cs.SnapshotLocks.Release(rbdSnap.RequestName)
|
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
|
|
|
|
2019-08-22 17:19:06 +00:00
|
|
|
if err = undoSnapReservation(ctx, rbdSnap, cr); err != nil {
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
return &csi.DeleteSnapshotResponse{}, nil
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
|
|
|
|
2019-09-12 04:53:37 +00:00
|
|
|
// safeguard against parallel create or delete requests against the same
|
|
|
|
// name
|
|
|
|
if acquired := cs.SnapshotLocks.TryAcquire(rbdSnap.RequestName); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.SnapshotOperationAlreadyExistsFmt), rbdSnap.RequestName)
|
2019-09-12 04:53:37 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, rbdSnap.RequestName)
|
|
|
|
}
|
|
|
|
defer cs.SnapshotLocks.Release(rbdSnap.RequestName)
|
2019-04-22 21:35:39 +00:00
|
|
|
|
2018-08-08 05:42:17 +00:00
|
|
|
// Unprotect snapshot
|
2019-08-22 16:57:23 +00:00
|
|
|
err = unprotectSnapshot(ctx, rbdSnap, cr)
|
2018-08-08 05:42:17 +00:00
|
|
|
if err != nil {
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Errorf(codes.FailedPrecondition,
|
|
|
|
"failed to unprotect snapshot: %s/%s with error: %v",
|
|
|
|
rbdSnap.Pool, rbdSnap.RbdSnapName, err)
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Deleting snapshot
|
2019-08-22 16:57:23 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "deleting Snaphot %s"), rbdSnap.RbdSnapName)
|
|
|
|
if err := deleteSnapshot(ctx, rbdSnap, cr); err != nil {
|
2019-04-22 21:35:39 +00:00
|
|
|
return nil, status.Errorf(codes.FailedPrecondition,
|
|
|
|
"failed to delete snapshot: %s/%s with error: %v",
|
|
|
|
rbdSnap.Pool, rbdSnap.RbdSnapName, err)
|
2018-08-08 05:42:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return &csi.DeleteSnapshotResponse{}, nil
|
|
|
|
}
|
2019-11-27 12:14:31 +00:00
|
|
|
|
|
|
|
// ControllerExpandVolume expand RBD Volumes on demand based on resizer request
|
|
|
|
func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi.ControllerExpandVolumeRequest) (*csi.ControllerExpandVolumeResponse, error) {
|
|
|
|
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_EXPAND_VOLUME); err != nil {
|
2019-12-16 09:29:04 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "invalid expand volume req: %v"), protosanitizer.StripSecrets(req))
|
2019-11-27 12:14:31 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
volID := req.GetVolumeId()
|
|
|
|
if volID == "" {
|
2019-12-13 10:29:33 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "volume ID cannot be empty")
|
2019-11-27 12:14:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
capRange := req.GetCapacityRange()
|
|
|
|
if capRange == nil {
|
2019-12-13 10:29:33 +00:00
|
|
|
return nil, status.Error(codes.InvalidArgument, "capacityRange cannot be empty")
|
|
|
|
}
|
|
|
|
|
|
|
|
// lock out parallel requests against the same volume ID
|
|
|
|
if acquired := cs.VolumeLocks.TryAcquire(volID); !acquired {
|
2020-03-23 02:15:35 +00:00
|
|
|
klog.Errorf(util.Log(ctx, util.VolumeOperationAlreadyExistsFmt), volID)
|
2019-12-13 10:29:33 +00:00
|
|
|
return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, volID)
|
2019-11-27 12:14:31 +00:00
|
|
|
}
|
2019-12-13 10:29:33 +00:00
|
|
|
defer cs.VolumeLocks.Release(volID)
|
2019-11-27 12:14:31 +00:00
|
|
|
|
|
|
|
cr, err := util.NewUserCredentials(req.GetSecrets())
|
|
|
|
if err != nil {
|
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
defer cr.DeleteCredentials()
|
|
|
|
|
2020-03-17 13:39:35 +00:00
|
|
|
rbdVol, err := genVolFromVolID(ctx, volID, cr, req.GetSecrets())
|
2019-11-27 12:14:31 +00:00
|
|
|
if err != nil {
|
2020-05-11 08:48:52 +00:00
|
|
|
switch err.(type) {
|
|
|
|
case ErrImageNotFound:
|
|
|
|
err = status.Errorf(codes.NotFound, "volume ID %s not found", volID)
|
|
|
|
case util.ErrPoolNotFound:
|
2020-01-31 08:49:11 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to get backend volume for %s: %v"), volID, err)
|
2020-05-11 08:48:52 +00:00
|
|
|
err = status.Errorf(codes.NotFound, err.Error())
|
|
|
|
default:
|
|
|
|
err = status.Errorf(codes.Internal, err.Error())
|
2020-01-31 08:49:11 +00:00
|
|
|
}
|
2020-05-11 08:48:52 +00:00
|
|
|
return nil, err
|
2019-11-27 12:14:31 +00:00
|
|
|
}
|
2020-03-17 13:39:35 +00:00
|
|
|
defer rbdVol.Destroy()
|
2019-11-27 12:14:31 +00:00
|
|
|
|
2020-01-29 11:44:45 +00:00
|
|
|
if rbdVol.Encrypted {
|
|
|
|
return nil, status.Errorf(codes.InvalidArgument, "encrypted volumes do not support resize (%s/%s)",
|
|
|
|
rbdVol.Pool, rbdVol.RbdImageName)
|
|
|
|
}
|
|
|
|
|
2019-11-27 12:14:31 +00:00
|
|
|
// always round up the request size in bytes to the nearest MiB/GiB
|
|
|
|
volSize := util.RoundOffBytes(req.GetCapacityRange().GetRequiredBytes())
|
|
|
|
|
|
|
|
// resize volume if required
|
2019-12-13 10:29:33 +00:00
|
|
|
nodeExpansion := false
|
2019-11-27 12:14:31 +00:00
|
|
|
if rbdVol.VolSize < volSize {
|
2019-12-13 10:29:33 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "rbd volume %s/%s size is %v,resizing to %v"), rbdVol.Pool, rbdVol.RbdImageName, rbdVol.VolSize, volSize)
|
|
|
|
rbdVol.VolSize = volSize
|
|
|
|
nodeExpansion = true
|
2020-02-26 09:35:18 +00:00
|
|
|
err = resizeRBDImage(rbdVol, cr)
|
2019-11-27 12:14:31 +00:00
|
|
|
if err != nil {
|
2019-12-13 10:29:33 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed to resize rbd image: %s/%s with error: %v"), rbdVol.Pool, rbdVol.RbdImageName, err)
|
2019-11-27 12:14:31 +00:00
|
|
|
return nil, status.Error(codes.Internal, err.Error())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &csi.ControllerExpandVolumeResponse{
|
2019-12-13 10:29:33 +00:00
|
|
|
CapacityBytes: rbdVol.VolSize,
|
2019-11-27 12:14:31 +00:00
|
|
|
NodeExpansionRequired: nodeExpansion,
|
|
|
|
}, nil
|
|
|
|
}
|