rbd: add locking for VolumeGroupSnapshot operations

Add VolumeGroupLocks in the CSI Controller Server so that operations are
protected against concurrent requests for the same VolumeGroupSnapshot.

Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
Niels de Vos 2024-11-11 12:42:48 +01:00 committed by mergify[bot]
parent f3d40f9e5a
commit d98516e9d8
3 changed files with 38 additions and 4 deletions

View File

@ -53,6 +53,10 @@ type ControllerServer struct {
// A map storing all volumes/snapshots with ongoing operations. // A map storing all volumes/snapshots with ongoing operations.
OperationLocks *util.OperationLock OperationLocks *util.OperationLock
// A map storing all volumes with ongoing operations so that additional operations
// for that same volume (as defined by volumegroup ID/volumegroup name) return an Aborted error
VolumeGroupLocks *util.VolumeLocks
// Cluster name // Cluster name
ClusterName string ClusterName string

View File

@ -62,6 +62,7 @@ func NewControllerServer(d *csicommon.CSIDriver) *rbd.ControllerServer {
DefaultControllerServer: csicommon.NewDefaultControllerServer(d), DefaultControllerServer: csicommon.NewDefaultControllerServer(d),
VolumeLocks: util.NewVolumeLocks(), VolumeLocks: util.NewVolumeLocks(),
SnapshotLocks: util.NewVolumeLocks(), SnapshotLocks: util.NewVolumeLocks(),
VolumeGroupLocks: util.NewVolumeLocks(),
OperationLocks: util.NewOperationLock(), OperationLocks: util.NewOperationLock(),
} }
} }

View File

@ -24,6 +24,7 @@ import (
"google.golang.org/grpc/status" "google.golang.org/grpc/status"
"github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/rbd/types"
"github.com/ceph/ceph-csi/internal/util"
"github.com/ceph/ceph-csi/internal/util/log" "github.com/ceph/ceph-csi/internal/util/log"
) )
@ -50,6 +51,14 @@ func (cs *ControllerServer) CreateVolumeGroupSnapshot(
vgsName = req.GetName() vgsName = req.GetName()
) )
// Existence and conflict checks
if acquired := cs.VolumeGroupLocks.TryAcquire(vgsName); !acquired {
log.ErrorLog(ctx, util.SnapshotOperationAlreadyExistsFmt, vgsName)
return nil, status.Errorf(codes.Aborted, util.SnapshotOperationAlreadyExistsFmt, vgsName)
}
defer cs.VolumeGroupLocks.Release(vgsName)
mgr := NewManager(cs.Driver.GetInstanceID(), req.GetParameters(), req.GetSecrets()) mgr := NewManager(cs.Driver.GetInstanceID(), req.GetParameters(), req.GetSecrets())
defer mgr.Destroy(ctx) defer mgr.Destroy(ctx)
@ -166,15 +175,25 @@ func (cs *ControllerServer) DeleteVolumeGroupSnapshot(
// 1. verify that all snapshots in the request are all snapshots in the group // 1. verify that all snapshots in the request are all snapshots in the group
// 2. delete the group // 2. delete the group
groupSnapshotID := req.GetGroupSnapshotId()
// Existence and conflict checks
if acquired := cs.VolumeGroupLocks.TryAcquire(groupSnapshotID); !acquired {
log.ErrorLog(ctx, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID)
return nil, status.Errorf(codes.Aborted, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID)
}
defer cs.VolumeGroupLocks.Release(groupSnapshotID)
mgr := NewManager(cs.Driver.GetInstanceID(), nil, req.GetSecrets()) mgr := NewManager(cs.Driver.GetInstanceID(), nil, req.GetSecrets())
defer mgr.Destroy(ctx) defer mgr.Destroy(ctx)
groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, req.GetGroupSnapshotId()) groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID)
if err != nil { if err != nil {
return nil, status.Errorf( return nil, status.Errorf(
codes.Internal, codes.Internal,
"failed to get volume group snapshot with id %q: %v", "failed to get volume group snapshot with id %q: %v",
req.GetGroupSnapshotId(), err) groupSnapshotID, err)
} }
defer groupSnapshot.Destroy(ctx) defer groupSnapshot.Destroy(ctx)
@ -195,15 +214,25 @@ func (cs *ControllerServer) GetVolumeGroupSnapshot(
ctx context.Context, ctx context.Context,
req *csi.GetVolumeGroupSnapshotRequest, req *csi.GetVolumeGroupSnapshotRequest,
) (*csi.GetVolumeGroupSnapshotResponse, error) { ) (*csi.GetVolumeGroupSnapshotResponse, error) {
groupSnapshotID := req.GetGroupSnapshotId()
// Existence and conflict checks
if acquired := cs.VolumeGroupLocks.TryAcquire(groupSnapshotID); !acquired {
log.ErrorLog(ctx, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID)
return nil, status.Errorf(codes.Aborted, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID)
}
defer cs.VolumeGroupLocks.Release(groupSnapshotID)
mgr := NewManager(cs.Driver.GetInstanceID(), nil, req.GetSecrets()) mgr := NewManager(cs.Driver.GetInstanceID(), nil, req.GetSecrets())
defer mgr.Destroy(ctx) defer mgr.Destroy(ctx)
groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, req.GetGroupSnapshotId()) groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID)
if err != nil { if err != nil {
return nil, status.Errorf( return nil, status.Errorf(
codes.Internal, codes.Internal,
"failed to get volume group snapshot with id %q: %v", "failed to get volume group snapshot with id %q: %v",
req.GetGroupSnapshotId(), err) groupSnapshotID, err)
} }
defer groupSnapshot.Destroy(ctx) defer groupSnapshot.Destroy(ctx)