From 8d3a44d0c45a0f675d92b065448430daf742cb6f Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 17 Nov 2020 09:04:29 +0530 Subject: [PATCH] rbd: add minsnapshotsonimage flag An rbd image can have a maximum number of snapshots defined by maxsnapshotsonimage On the limit is reached the cephcsi will start flattening the older snapshots and returns the ABORT error message, The Request comes after this as to wait till all the images are flattened (this will increase the PVC creation time. Instead of waiting till the maximum snapshots on an RBD image, we can have a soft limit, once the limit reached cephcsi will start flattening the task to break the chain. With this PVC creation time will only be affected when the hard limit (minsnapshotsonimage) reached. Signed-off-by: Madhu Rajanna --- .../templates/provisioner-deployment.yaml | 1 + charts/ceph-csi-rbd/values.yaml | 2 ++ cmd/cephcsi.go | 5 ++++ internal/rbd/controllerserver.go | 29 ++++++++++++++++--- internal/rbd/driver.go | 8 +++-- internal/util/util.go | 5 ++++ scripts/install-helm.sh | 2 +- 7 files changed, 44 insertions(+), 8 deletions(-) diff --git a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml index 7662e2e64..0aa10530a 100644 --- a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml +++ b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml @@ -133,6 +133,7 @@ spec: - "--rbdhardmaxclonedepth={{ .Values.provisioner.hardMaxCloneDepth }}" - "--rbdsoftmaxclonedepth={{ .Values.provisioner.softMaxCloneDepth }}" - "--maxsnapshotsonimage={{ .Values.provisioner.maxSnapshotsOnImage }}" + - "--minsnapshotsonimage={{ .Values.provisioner.minSnapshotsOnImage }}" {{- if .Values.provisioner.skipForceFlatten }} - "--skipforceflatten={{ .Values.provisioner.skipForceFlatten }}" {{- end }} diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 4cc45ac1f..a3bd2589b 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -120,6 +120,8 @@ provisioner: softMaxCloneDepth: 4 # Maximum number of snapshots allowed on rbd image without flattening maxSnapshotsOnImage: 450 + # Minimum number of snapshots allowed on rbd image to trigger flattening + minSnapshotsOnImage: 250 # skip image flattening if kernel support mapping of rbd images # which has the deep-flatten feature # skipForceFlatten: false diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index 97c299e63..fd4769e0e 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -78,6 +78,7 @@ func init() { flag.UintVar(&conf.RbdHardMaxCloneDepth, "rbdhardmaxclonedepth", 8, "Hard limit for maximum number of nested volume clones that are taken before a flatten occurs") flag.UintVar(&conf.RbdSoftMaxCloneDepth, "rbdsoftmaxclonedepth", 4, "Soft limit for maximum number of nested volume clones that are taken before a flatten occurs") flag.UintVar(&conf.MaxSnapshotsOnImage, "maxsnapshotsonimage", 450, "Maximum number of snapshots allowed on rbd image without flattening") + flag.UintVar(&conf.MinSnapshotsOnImage, "minsnapshotsonimage", 250, "Minimum number of snapshots required on rbd image to start flattening") flag.BoolVar(&conf.SkipForceFlatten, "skipforceflatten", false, "skip image flattening if kernel support mapping of rbd images which has the deep-flatten feature") @@ -204,6 +205,10 @@ func validateMaxSnaphostFlag(conf *util.Config) { if conf.MaxSnapshotsOnImage == 0 || conf.MaxSnapshotsOnImage > 500 { logAndExit("maxsnapshotsonimage flag value should be between 1 and 500") } + + if conf.MinSnapshotsOnImage > conf.MaxSnapshotsOnImage { + logAndExit("minsnapshotsonimage flag value should be less than maxsnapshotsonimage") + } } func logAndExit(msg string) { diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index e496b64d7..5bd864e46 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -342,10 +342,12 @@ func flattenParentImage(ctx context.Context, rbdVol *rbdVolume, cr *util.Credent return nil } -// check snapshots on the rbd image, as we have limit from krbd that -// an image cannot have more than 510 snapshot at a given point of time. -// If the snapshots are more than the `maxSnapshotsOnImage` Add a task to -// flatten all the temporary cloned images. +// check snapshots on the rbd image, as we have limit from krbd that an image +// cannot have more than 510 snapshot at a given point of time. If the +// snapshots are more than the `maxSnapshotsOnImage` Add a task to flatten all +// the temporary cloned images and return ABORT error message. If the snapshots +// are more than the `minSnapshotOnImage` Add a task to flatten all the +// temporary cloned images. func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials) error { snaps, err := rbdVol.listSnapshots() if err != nil { @@ -356,6 +358,7 @@ func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *ut } if len(snaps) > int(maxSnapshotsOnImage) { + util.DebugLog(ctx, "snapshots count %d on image: %s reached configured hard limit %d", len(snaps), rbdVol, maxSnapshotsOnImage) err = flattenClonedRbdImages( ctx, snaps, @@ -368,6 +371,24 @@ func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *ut } return status.Errorf(codes.ResourceExhausted, "rbd image %s has %d snapshots", rbdVol, len(snaps)) } + + if len(snaps) > int(minSnapshotsOnImageToStartFlatten) { + util.DebugLog(ctx, "snapshots count %d on image: %s reached configured soft limit %d", len(snaps), rbdVol, minSnapshotsOnImageToStartFlatten) + // If we start flattening all the snapshots at one shot the volume + // creation time will be affected,so we will flatten only the extra + // snapshots. + snaps = snaps[minSnapshotsOnImageToStartFlatten-1:] + err = flattenClonedRbdImages( + ctx, + snaps, + rbdVol.Pool, + rbdVol.Monitors, + rbdVol.RbdImageName, + cr) + if err != nil { + return status.Error(codes.Internal, err.Error()) + } + } return nil } diff --git a/internal/rbd/driver.go b/internal/rbd/driver.go index ff2d16f78..c92ac5cfe 100644 --- a/internal/rbd/driver.go +++ b/internal/rbd/driver.go @@ -53,9 +53,10 @@ var ( rbdHardMaxCloneDepth uint // rbdSoftMaxCloneDepth is the soft limit for maximum number of nested volume clones that are taken before a flatten occurs - rbdSoftMaxCloneDepth uint - maxSnapshotsOnImage uint - skipForceFlatten bool + rbdSoftMaxCloneDepth uint + maxSnapshotsOnImage uint + minSnapshotsOnImageToStartFlatten uint + skipForceFlatten bool ) // NewDriver returns new rbd driver. @@ -111,6 +112,7 @@ func (r *Driver) Run(conf *util.Config) { rbdSoftMaxCloneDepth = conf.RbdSoftMaxCloneDepth skipForceFlatten = conf.SkipForceFlatten maxSnapshotsOnImage = conf.MaxSnapshotsOnImage + minSnapshotsOnImageToStartFlatten = conf.MinSnapshotsOnImage // Create instances of the volume and snapshot journal volJournal = journal.NewCSIVolumeJournal(CSIInstanceID) snapJournal = journal.NewCSISnapshotJournal(CSIInstanceID) diff --git a/internal/util/util.go b/internal/util/util.go index 99c455107..24b8d6ed6 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -107,6 +107,11 @@ type Config struct { // on rbd image without flattening, once the limit is reached cephcsi will // start flattening the older rbd images to allow more snapshots MaxSnapshotsOnImage uint + + // MinSnapshotsOnImage represents the soft limit for maximum number of + // snapshots allowed on rbd image without flattening, once the soft limit is + // reached cephcsi will start flattening the older rbd images. + MinSnapshotsOnImage uint } // ValidateDriverName validates the driver name. diff --git a/scripts/install-helm.sh b/scripts/install-helm.sh index b34c36e48..d96e6167a 100755 --- a/scripts/install-helm.sh +++ b/scripts/install-helm.sh @@ -125,7 +125,7 @@ install_cephcsi_helm_charts() { # deleting configmap as a workaround to avoid configmap already present # issue when installing ceph-csi-rbd kubectl delete cm ceph-csi-config --namespace ${NAMESPACE} - "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.podSecurityPolicy.enabled=true --set nodeplugin.podSecurityPolicy.enabled=true --set provisioner.replicaCount=1 ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 + "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.podSecurityPolicy.enabled=true --set nodeplugin.podSecurityPolicy.enabled=true --set provisioner.replicaCount=1 ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 check_deployment_status app=ceph-csi-rbd ${NAMESPACE} check_daemonset_status app=ceph-csi-rbd ${NAMESPACE}