From 4be9c0159fc355d37d310306ef1f91ff6a4c39b2 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 17 Nov 2020 09:04:29 +0530 Subject: [PATCH] rbd: add minsnapshotsonimage flag An rbd image can have a maximum number of snapshots defined by maxsnapshotsonimage On the limit is reached the cephcsi will start flattening the older snapshots and returns the ABORT error message, The Request comes after this as to wait till all the images are flattened (this will increase the PVC creation time. Instead of waiting till the maximum snapshots on an RBD image, we can have a soft limit, once the limit reached cephcsi will start flattening the task to break the chain. With this PVC creation time will only be affected when the hard limit (minsnapshotsonimage) reached. Signed-off-by: Madhu Rajanna (cherry picked from commit 8d3a44d0c45a0f675d92b065448430daf742cb6f) --- .../templates/provisioner-deployment.yaml | 1 + charts/ceph-csi-rbd/values.yaml | 2 ++ cmd/cephcsi.go | 5 +++ internal/rbd/controllerserver.go | 35 ++++++++++++++++--- internal/rbd/driver.go | 8 +++-- internal/util/util.go | 5 +++ scripts/install-helm.sh | 2 +- 7 files changed, 49 insertions(+), 9 deletions(-) diff --git a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml index 55777e785..4b4c48e18 100644 --- a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml +++ b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml @@ -117,6 +117,7 @@ spec: - "--rbdhardmaxclonedepth={{ .Values.provisioner.hardMaxCloneDepth }}" - "--rbdsoftmaxclonedepth={{ .Values.provisioner.softMaxCloneDepth }}" - "--maxsnapshotsonimage={{ .Values.provisioner.maxSnapshotsOnImage }}" + - "--minsnapshotsonimage={{ .Values.provisioner.minSnapshotsOnImage }}" {{- if .Values.provisioner.skipForceFlatten }} - "--skipforceflatten={{ .Values.provisioner.skipForceFlatten }}" {{- end }} diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 802077753..8fabebf04 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -115,6 +115,8 @@ provisioner: softMaxCloneDepth: 4 # Maximum number of snapshots allowed on rbd image without flattening maxSnapshotsOnImage: 450 + # Minimum number of snapshots allowed on rbd image to trigger flattening + minSnapshotsOnImage: 250 # skip image flattening if kernel support mapping of rbd images # which has the deep-flatten feature # skipForceFlatten: false diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index eaf386c9b..bf2496a61 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -78,6 +78,7 @@ func init() { flag.UintVar(&conf.RbdHardMaxCloneDepth, "rbdhardmaxclonedepth", 8, "Hard limit for maximum number of nested volume clones that are taken before a flatten occurs") flag.UintVar(&conf.RbdSoftMaxCloneDepth, "rbdsoftmaxclonedepth", 4, "Soft limit for maximum number of nested volume clones that are taken before a flatten occurs") flag.UintVar(&conf.MaxSnapshotsOnImage, "maxsnapshotsonimage", 450, "Maximum number of snapshots allowed on rbd image without flattening") + flag.UintVar(&conf.MinSnapshotsOnImage, "minsnapshotsonimage", 250, "Minimum number of snapshots required on rbd image to start flattening") flag.BoolVar(&conf.SkipForceFlatten, "skipforceflatten", false, "skip image flattening if kernel support mapping of rbd images which has the deep-flatten feature") @@ -207,4 +208,8 @@ func validateMaxSnaphostFlag(conf *util.Config) { if conf.MaxSnapshotsOnImage == 0 || conf.MaxSnapshotsOnImage > 500 { klog.Fatalln("maxsnapshotsonimage flag value should be between 1 and 500") } + + if conf.MinSnapshotsOnImage > conf.MaxSnapshotsOnImage { + klog.Fatalln("minsnapshotsonimage flag value should be less than maxsnapshotsonimage") + } } diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 9e994afc1..50eddf00e 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -342,10 +342,12 @@ func flattenParentImage(ctx context.Context, rbdVol *rbdVolume, cr *util.Credent return nil } -// check snapshots on the rbd image, as we have limit from krbd that -// an image cannot have more than 510 snapshot at a given point of time. -// If the snapshots are more than the `maxSnapshotsOnImage` Add a task to -// flatten all the temporary cloned images. +// check snapshots on the rbd image, as we have limit from krbd that an image +// cannot have more than 510 snapshot at a given point of time. If the +// snapshots are more than the `maxSnapshotsOnImage` Add a task to flatten all +// the temporary cloned images and return ABORT error message. If the snapshots +// are more than the `minSnapshotOnImage` Add a task to flatten all the +// temporary cloned images. func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials) error { snaps, err := rbdVol.listSnapshots(ctx, cr) if err != nil { @@ -356,12 +358,35 @@ func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *ut } if len(snaps) > int(maxSnapshotsOnImage) { - err = flattenClonedRbdImages(ctx, snaps, rbdVol.Pool, rbdVol.Monitors, cr) + util.DebugLog(ctx, "snapshots count %d on image: %s reached configured hard limit %d", len(snaps), rbdVol, maxSnapshotsOnImage) + err = flattenClonedRbdImages( + ctx, + snaps, + rbdVol.Pool, + rbdVol.Monitors, + cr) if err != nil { return status.Error(codes.Internal, err.Error()) } return status.Errorf(codes.ResourceExhausted, "rbd image %s has %d snapshots", rbdVol, len(snaps)) } + + if len(snaps) > int(minSnapshotsOnImageToStartFlatten) { + util.DebugLog(ctx, "snapshots count %d on image: %s reached configured soft limit %d", len(snaps), rbdVol, minSnapshotsOnImageToStartFlatten) + // If we start flattening all the snapshots at one shot the volume + // creation time will be affected,so we will flatten only the extra + // snapshots. + snaps = snaps[minSnapshotsOnImageToStartFlatten-1:] + err = flattenClonedRbdImages( + ctx, + snaps, + rbdVol.Pool, + rbdVol.Monitors, + cr) + if err != nil { + return status.Error(codes.Internal, err.Error()) + } + } return nil } diff --git a/internal/rbd/driver.go b/internal/rbd/driver.go index ff2d16f78..c92ac5cfe 100644 --- a/internal/rbd/driver.go +++ b/internal/rbd/driver.go @@ -53,9 +53,10 @@ var ( rbdHardMaxCloneDepth uint // rbdSoftMaxCloneDepth is the soft limit for maximum number of nested volume clones that are taken before a flatten occurs - rbdSoftMaxCloneDepth uint - maxSnapshotsOnImage uint - skipForceFlatten bool + rbdSoftMaxCloneDepth uint + maxSnapshotsOnImage uint + minSnapshotsOnImageToStartFlatten uint + skipForceFlatten bool ) // NewDriver returns new rbd driver. @@ -111,6 +112,7 @@ func (r *Driver) Run(conf *util.Config) { rbdSoftMaxCloneDepth = conf.RbdSoftMaxCloneDepth skipForceFlatten = conf.SkipForceFlatten maxSnapshotsOnImage = conf.MaxSnapshotsOnImage + minSnapshotsOnImageToStartFlatten = conf.MinSnapshotsOnImage // Create instances of the volume and snapshot journal volJournal = journal.NewCSIVolumeJournal(CSIInstanceID) snapJournal = journal.NewCSISnapshotJournal(CSIInstanceID) diff --git a/internal/util/util.go b/internal/util/util.go index 99c455107..24b8d6ed6 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -107,6 +107,11 @@ type Config struct { // on rbd image without flattening, once the limit is reached cephcsi will // start flattening the older rbd images to allow more snapshots MaxSnapshotsOnImage uint + + // MinSnapshotsOnImage represents the soft limit for maximum number of + // snapshots allowed on rbd image without flattening, once the soft limit is + // reached cephcsi will start flattening the older rbd images. + MinSnapshotsOnImage uint } // ValidateDriverName validates the driver name. diff --git a/scripts/install-helm.sh b/scripts/install-helm.sh index 298a2e3f8..a306d674b 100755 --- a/scripts/install-helm.sh +++ b/scripts/install-helm.sh @@ -125,7 +125,7 @@ install_cephcsi_helm_charts() { # deleting configmap as a workaround to avoid configmap already present # issue when installing ceph-csi-rbd kubectl delete cm ceph-csi-config --namespace ${NAMESPACE} - "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.podSecurityPolicy.enabled=true --set nodeplugin.podSecurityPolicy.enabled=true ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 + "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.podSecurityPolicy.enabled=true --set nodeplugin.podSecurityPolicy.enabled=true ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 check_deployment_status app=ceph-csi-rbd ${NAMESPACE} check_daemonset_status app=ceph-csi-rbd ${NAMESPACE}