diff --git a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml index 55777e785..4b4c48e18 100644 --- a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml +++ b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml @@ -117,6 +117,7 @@ spec: - "--rbdhardmaxclonedepth={{ .Values.provisioner.hardMaxCloneDepth }}" - "--rbdsoftmaxclonedepth={{ .Values.provisioner.softMaxCloneDepth }}" - "--maxsnapshotsonimage={{ .Values.provisioner.maxSnapshotsOnImage }}" + - "--minsnapshotsonimage={{ .Values.provisioner.minSnapshotsOnImage }}" {{- if .Values.provisioner.skipForceFlatten }} - "--skipforceflatten={{ .Values.provisioner.skipForceFlatten }}" {{- end }} diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 802077753..8fabebf04 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -115,6 +115,8 @@ provisioner: softMaxCloneDepth: 4 # Maximum number of snapshots allowed on rbd image without flattening maxSnapshotsOnImage: 450 + # Minimum number of snapshots allowed on rbd image to trigger flattening + minSnapshotsOnImage: 250 # skip image flattening if kernel support mapping of rbd images # which has the deep-flatten feature # skipForceFlatten: false diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index eaf386c9b..bf2496a61 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -78,6 +78,7 @@ func init() { flag.UintVar(&conf.RbdHardMaxCloneDepth, "rbdhardmaxclonedepth", 8, "Hard limit for maximum number of nested volume clones that are taken before a flatten occurs") flag.UintVar(&conf.RbdSoftMaxCloneDepth, "rbdsoftmaxclonedepth", 4, "Soft limit for maximum number of nested volume clones that are taken before a flatten occurs") flag.UintVar(&conf.MaxSnapshotsOnImage, "maxsnapshotsonimage", 450, "Maximum number of snapshots allowed on rbd image without flattening") + flag.UintVar(&conf.MinSnapshotsOnImage, "minsnapshotsonimage", 250, "Minimum number of snapshots required on rbd image to start flattening") flag.BoolVar(&conf.SkipForceFlatten, "skipforceflatten", false, "skip image flattening if kernel support mapping of rbd images which has the deep-flatten feature") @@ -207,4 +208,8 @@ func validateMaxSnaphostFlag(conf *util.Config) { if conf.MaxSnapshotsOnImage == 0 || conf.MaxSnapshotsOnImage > 500 { klog.Fatalln("maxsnapshotsonimage flag value should be between 1 and 500") } + + if conf.MinSnapshotsOnImage > conf.MaxSnapshotsOnImage { + klog.Fatalln("minsnapshotsonimage flag value should be less than maxsnapshotsonimage") + } } diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 9e994afc1..50eddf00e 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -342,10 +342,12 @@ func flattenParentImage(ctx context.Context, rbdVol *rbdVolume, cr *util.Credent return nil } -// check snapshots on the rbd image, as we have limit from krbd that -// an image cannot have more than 510 snapshot at a given point of time. -// If the snapshots are more than the `maxSnapshotsOnImage` Add a task to -// flatten all the temporary cloned images. +// check snapshots on the rbd image, as we have limit from krbd that an image +// cannot have more than 510 snapshot at a given point of time. If the +// snapshots are more than the `maxSnapshotsOnImage` Add a task to flatten all +// the temporary cloned images and return ABORT error message. If the snapshots +// are more than the `minSnapshotOnImage` Add a task to flatten all the +// temporary cloned images. func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials) error { snaps, err := rbdVol.listSnapshots(ctx, cr) if err != nil { @@ -356,12 +358,35 @@ func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *ut } if len(snaps) > int(maxSnapshotsOnImage) { - err = flattenClonedRbdImages(ctx, snaps, rbdVol.Pool, rbdVol.Monitors, cr) + util.DebugLog(ctx, "snapshots count %d on image: %s reached configured hard limit %d", len(snaps), rbdVol, maxSnapshotsOnImage) + err = flattenClonedRbdImages( + ctx, + snaps, + rbdVol.Pool, + rbdVol.Monitors, + cr) if err != nil { return status.Error(codes.Internal, err.Error()) } return status.Errorf(codes.ResourceExhausted, "rbd image %s has %d snapshots", rbdVol, len(snaps)) } + + if len(snaps) > int(minSnapshotsOnImageToStartFlatten) { + util.DebugLog(ctx, "snapshots count %d on image: %s reached configured soft limit %d", len(snaps), rbdVol, minSnapshotsOnImageToStartFlatten) + // If we start flattening all the snapshots at one shot the volume + // creation time will be affected,so we will flatten only the extra + // snapshots. + snaps = snaps[minSnapshotsOnImageToStartFlatten-1:] + err = flattenClonedRbdImages( + ctx, + snaps, + rbdVol.Pool, + rbdVol.Monitors, + cr) + if err != nil { + return status.Error(codes.Internal, err.Error()) + } + } return nil } diff --git a/internal/rbd/driver.go b/internal/rbd/driver.go index ff2d16f78..c92ac5cfe 100644 --- a/internal/rbd/driver.go +++ b/internal/rbd/driver.go @@ -53,9 +53,10 @@ var ( rbdHardMaxCloneDepth uint // rbdSoftMaxCloneDepth is the soft limit for maximum number of nested volume clones that are taken before a flatten occurs - rbdSoftMaxCloneDepth uint - maxSnapshotsOnImage uint - skipForceFlatten bool + rbdSoftMaxCloneDepth uint + maxSnapshotsOnImage uint + minSnapshotsOnImageToStartFlatten uint + skipForceFlatten bool ) // NewDriver returns new rbd driver. @@ -111,6 +112,7 @@ func (r *Driver) Run(conf *util.Config) { rbdSoftMaxCloneDepth = conf.RbdSoftMaxCloneDepth skipForceFlatten = conf.SkipForceFlatten maxSnapshotsOnImage = conf.MaxSnapshotsOnImage + minSnapshotsOnImageToStartFlatten = conf.MinSnapshotsOnImage // Create instances of the volume and snapshot journal volJournal = journal.NewCSIVolumeJournal(CSIInstanceID) snapJournal = journal.NewCSISnapshotJournal(CSIInstanceID) diff --git a/internal/util/util.go b/internal/util/util.go index 99c455107..24b8d6ed6 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -107,6 +107,11 @@ type Config struct { // on rbd image without flattening, once the limit is reached cephcsi will // start flattening the older rbd images to allow more snapshots MaxSnapshotsOnImage uint + + // MinSnapshotsOnImage represents the soft limit for maximum number of + // snapshots allowed on rbd image without flattening, once the soft limit is + // reached cephcsi will start flattening the older rbd images. + MinSnapshotsOnImage uint } // ValidateDriverName validates the driver name. diff --git a/scripts/install-helm.sh b/scripts/install-helm.sh index 298a2e3f8..a306d674b 100755 --- a/scripts/install-helm.sh +++ b/scripts/install-helm.sh @@ -125,7 +125,7 @@ install_cephcsi_helm_charts() { # deleting configmap as a workaround to avoid configmap already present # issue when installing ceph-csi-rbd kubectl delete cm ceph-csi-config --namespace ${NAMESPACE} - "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.podSecurityPolicy.enabled=true --set nodeplugin.podSecurityPolicy.enabled=true ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 + "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.podSecurityPolicy.enabled=true --set nodeplugin.podSecurityPolicy.enabled=true ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 check_deployment_status app=ceph-csi-rbd ${NAMESPACE} check_daemonset_status app=ceph-csi-rbd ${NAMESPACE}