From 7411773f7367f4ef05f9012f71361bfcd6cf6a70 Mon Sep 17 00:00:00 2001 From: Konstantin Shalygin Date: Thu, 16 Sep 2021 13:38:04 +0300 Subject: [PATCH 1/5] rbd: added RBD features support for krbd Added support for `object-map, fast-diff` Signed-off-by: Konstantin Shalygin --- charts/ceph-csi-rbd/values.yaml | 9 +++++---- docs/deploy-rbd.md | 2 +- examples/rbd/storageclass.yaml | 9 +++++---- internal/rbd/rbd_util.go | 10 +++++++++- internal/rbd/rbd_util_test.go | 32 ++++++++++++++++++++++++++++---- 5 files changed, 48 insertions(+), 14 deletions(-) diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 740ff12db..e37e17de2 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -284,10 +284,11 @@ storageClass: thickProvision: false # (required) RBD image features, CSI creates image with image-format 2 - # CSI RBD currently supports `layering`, `journaling`, `exclusive-lock` - # features. If `journaling` is enabled, must enable `exclusive-lock` too. - # imageFeatures: layering,journaling,exclusive-lock - imageFeatures: layering + # CSI RBD currently supports `layering`, `journaling`, `exclusive-lock`, + # `object-map`, `fast-diff` features. If `journaling` is enabled, must + # enable `exclusive-lock` too. + # imageFeatures: layering,journaling,exclusive-lock,object-map,fast-diff + imageFeatures: "layering" # (optional) Specifies whether to try other mounters in case if the current # mounter fails to mount the rbd image for any reason. True means fallback diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index 2dd798c1e..6e20b5f98 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -55,7 +55,7 @@ make image-cephcsi | `dataPool` | no | Ceph pool used for the data of the RBD images. | | `volumeNamePrefix` | no | Prefix to use for naming RBD images (defaults to `csi-vol-`). | | `snapshotNamePrefix` | no | Prefix to use for naming RBD snapshot images (defaults to `csi-snap-`). | -| `imageFeatures` | yes | RBD image features. CSI RBD currently supports `layering`, `journaling`, `exclusive-lock` features. If `journaling` is enabled, must enable `exclusive-lock` too. See [man pages](http://docs.ceph.com/docs/master/man/8/rbd/#cmdoption-rbd-image-feature) Note that the required support for [object-map and fast-diff were added in 5.3 and journaling does not have KRBD support yet](https://docs.ceph.com/en/latest/rbd/rbd-config-ref/#image-features). deep-flatten is added for cloned images. | +| `imageFeatures` | yes | RBD image features. CSI RBD currently supports `layering`, `journaling`, `exclusive-lock`, `object-map`, `fast-diff` features. If `journaling` is enabled, must enable `exclusive-lock` too. See [man pages](http://docs.ceph.com/docs/master/man/8/rbd/#cmdoption-rbd-image-feature) Note that the required support for [object-map and fast-diff were added in 5.3 and journaling does not have KRBD support yet](https://docs.ceph.com/en/latest/rbd/rbd-config-ref/#image-features). deep-flatten is added for cloned images. | | `tryOtherMounters` | no | Specifies whether to try other mounters in case if the current mounter fails to mount the rbd image for any reason | | `mapOptions` | no | Map options to use when mapping rbd image. See [krbd](https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options) and [nbd](https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options) options. | | `unmapOptions` | no | Unmap options to use when unmapping rbd image. See [krbd](https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options) and [nbd](https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options) options. | diff --git a/examples/rbd/storageclass.yaml b/examples/rbd/storageclass.yaml index 65ce181ab..3df8552b3 100644 --- a/examples/rbd/storageclass.yaml +++ b/examples/rbd/storageclass.yaml @@ -34,10 +34,11 @@ parameters: # thickProvision: "false" # (required) RBD image features, CSI creates image with image-format 2 - # CSI RBD currently supports `layering`, `journaling`, `exclusive-lock` - # features. If `journaling` is enabled, must enable `exclusive-lock` too. - # imageFeatures: layering,journaling,exclusive-lock - imageFeatures: layering + # CSI RBD currently supports `layering`, `journaling`, `exclusive-lock`, + # `object-map`, `fast-diff` features. If `journaling` is enabled, must + # enable `exclusive-lock` too. + # imageFeatures: layering,journaling,exclusive-lock,object-map,fast-diff + imageFeatures: "layering" # (optional) Specifies whether to try other mounters in case if the current # mounter fails to mount the rbd image for any reason. True means fallback diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 1ac823be9..550ece80c 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -206,7 +206,15 @@ var ( needRbdNbd: false, }, librbd.FeatureNameExclusiveLock: { - needRbdNbd: true, + needRbdNbd: false, + }, + librbd.FeatureNameObjectMap: { + needRbdNbd: false, + dependsOn: []string{librbd.FeatureNameExclusiveLock}, + }, + librbd.FeatureNameFastDiff: { + needRbdNbd: false, + dependsOn: []string{librbd.FeatureNameObjectMap}, }, librbd.FeatureNameJournaling: { needRbdNbd: true, diff --git a/internal/rbd/rbd_util_test.go b/internal/rbd/rbd_util_test.go index f00bc038c..331eb693d 100644 --- a/internal/rbd/rbd_util_test.go +++ b/internal/rbd/rbd_util_test.go @@ -88,21 +88,37 @@ func TestValidateImageFeatures(t *testing.T) { false, "", }, + { + "layering,exclusive-lock,object-map,fast-diff", + &rbdVolume{ + Mounter: rbdDefaultMounter, + }, + false, + "", + }, { "layering,journaling", &rbdVolume{ - Mounter: rbdNbdMounter, + Mounter: rbdDefaultMounter, }, true, "feature journaling requires exclusive-lock to be set", }, { - "layering,exclusive-lock,journaling", + "object-map,fast-diff", &rbdVolume{ Mounter: rbdDefaultMounter, }, true, - "feature exclusive-lock requires rbd-nbd for mounter", + "feature object-map requires exclusive-lock to be set", + }, + { + "fast-diff", + &rbdVolume{ + Mounter: rbdDefaultMounter, + }, + true, + "feature fast-diff requires object-map to be set", }, { "layering,exclusive-lock,journaling", @@ -110,7 +126,15 @@ func TestValidateImageFeatures(t *testing.T) { Mounter: rbdDefaultMounter, }, true, - "feature exclusive-lock requires rbd-nbd for mounter", + "feature journaling requires rbd-nbd for mounter", + }, + { + "layering,exclusive-lock,journaling", + &rbdVolume{ + Mounter: rbdDefaultMounter, + }, + true, + "feature journaling requires rbd-nbd for mounter", }, { "layering,exclusive-loc,journaling", From 73ecf06f97810fbf836d76a5d47244b4fff2aef0 Mon Sep 17 00:00:00 2001 From: Humble Chirammal Date: Mon, 6 Dec 2021 15:58:08 +0530 Subject: [PATCH 2/5] ci: rename golangci linter github action file to proper name Signed-off-by: Humble Chirammal --- .github/workflows/{golanci-lint.yaml => golangci-lint.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{golanci-lint.yaml => golangci-lint.yaml} (100%) diff --git a/.github/workflows/golanci-lint.yaml b/.github/workflows/golangci-lint.yaml similarity index 100% rename from .github/workflows/golanci-lint.yaml rename to .github/workflows/golangci-lint.yaml From d943fbd265adba5c2626f45864137ea90a5f75fa Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 1 Dec 2021 13:06:53 +0530 Subject: [PATCH 3/5] e2e: run generic ephemeral for kubernetes 1.21+ Currently, we are skipping the generic ephemeral testing if the kubernetes version is less than 1.21 because of this one the who test suite is getting skipped and e2e is marked as success in 2 minutes. This commit runs the ephemeral tests if the kube=>1.21+. If we do this, for the lower version we can run other tests. Signed-off-by: Madhu Rajanna --- e2e/cephfs.go | 53 +++++++++++++++++++++++++-------------------------- e2e/rbd.go | 49 +++++++++++++++++++++++------------------------ 2 files changed, 50 insertions(+), 52 deletions(-) diff --git a/e2e/cephfs.go b/e2e/cephfs.go index b340969da..452b480e4 100644 --- a/e2e/cephfs.go +++ b/e2e/cephfs.go @@ -315,33 +315,32 @@ var _ = Describe("cephfs", func() { } By("verify generic ephemeral volume support", func() { // generic ephemeral volume support is beta since v1.21. - if !k8sVersionGreaterEquals(f.ClientSet, 1, 21) { - Skip("generic ephemeral volume only supported from v1.21+") - } - err := createCephfsStorageClass(f.ClientSet, f, true, nil) - if err != nil { - e2elog.Failf("failed to create CephFS storageclass: %v", err) - } - // create application - app, err := loadApp(appEphemeralPath) - if err != nil { - e2elog.Failf("failed to load application: %v", err) - } - app.Namespace = f.UniqueName - err = createApp(f.ClientSet, app, deployTimeout) - if err != nil { - e2elog.Failf("failed to create application: %v", err) - } - validateSubvolumeCount(f, 1, fileSystemName, subvolumegroup) - // delete pod - err = deletePod(app.Name, app.Namespace, f.ClientSet, deployTimeout) - if err != nil { - e2elog.Failf("failed to delete application: %v", err) - } - validateSubvolumeCount(f, 0, fileSystemName, subvolumegroup) - err = deleteResource(cephFSExamplePath + "storageclass.yaml") - if err != nil { - e2elog.Failf("failed to delete CephFS storageclass: %v", err) + if k8sVersionGreaterEquals(f.ClientSet, 1, 21) { + err := createCephfsStorageClass(f.ClientSet, f, true, nil) + if err != nil { + e2elog.Failf("failed to create CephFS storageclass: %v", err) + } + // create application + app, err := loadApp(appEphemeralPath) + if err != nil { + e2elog.Failf("failed to load application: %v", err) + } + app.Namespace = f.UniqueName + err = createApp(f.ClientSet, app, deployTimeout) + if err != nil { + e2elog.Failf("failed to create application: %v", err) + } + validateSubvolumeCount(f, 1, fileSystemName, subvolumegroup) + // delete pod + err = deletePod(app.Name, app.Namespace, f.ClientSet, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete application: %v", err) + } + validateSubvolumeCount(f, 0, fileSystemName, subvolumegroup) + err = deleteResource(cephFSExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete CephFS storageclass: %v", err) + } } }) diff --git a/e2e/rbd.go b/e2e/rbd.go index a7cc6d4f5..27f1931ea 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -373,31 +373,30 @@ var _ = Describe("RBD", func() { } By("verify generic ephemeral volume support", func() { // generic ephemeral volume support is supported from 1.21 - if !k8sVersionGreaterEquals(f.ClientSet, 1, 21) { - Skip("generic ephemeral volume only supported from v1.21+") - } - // create application - app, err := loadApp(appEphemeralPath) - if err != nil { - e2elog.Failf("failed to load application: %v", err) - } - app.Namespace = f.UniqueName - err = createApp(f.ClientSet, app, deployTimeout) - if err != nil { - e2elog.Failf("failed to create application: %v", err) - } - // validate created backend rbd images - validateRBDImageCount(f, 1, defaultRBDPool) - err = deletePod(app.Name, app.Namespace, f.ClientSet, deployTimeout) - if err != nil { - e2elog.Failf("failed to delete application: %v", err) - } - // validate created backend rbd images - validateRBDImageCount(f, 0, defaultRBDPool) - // validate images in trash - err = waitToRemoveImagesFromTrash(f, defaultRBDPool, deployTimeout) - if err != nil { - e2elog.Failf("failed to validate rbd images in pool %s trash: %v", defaultRBDPool, err) + if k8sVersionGreaterEquals(f.ClientSet, 1, 21) { + // create application + app, err := loadApp(appEphemeralPath) + if err != nil { + e2elog.Failf("failed to load application: %v", err) + } + app.Namespace = f.UniqueName + err = createApp(f.ClientSet, app, deployTimeout) + if err != nil { + e2elog.Failf("failed to create application: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 1, defaultRBDPool) + err = deletePod(app.Name, app.Namespace, f.ClientSet, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete application: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 0, defaultRBDPool) + // validate images in trash + err = waitToRemoveImagesFromTrash(f, defaultRBDPool, deployTimeout) + if err != nil { + e2elog.Failf("failed to validate rbd images in pool %s trash: %v", defaultRBDPool, err) + } } }) From 9a4533e5493668bd95cfd44d6aa0aad795885567 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 1 Dec 2021 15:41:32 +0530 Subject: [PATCH 4/5] rbd: create 1MiB size dummy image we added a workaround for rbd scheduling by creating a dummy image in #2656. with the fix we are creating a dummy image of the size of the first actual rbd image which is sent in EnableVolumeReplication request if the actual rbd image size is 1TiB we are creating a dummy image of 1TiB which is not good. even though its a thin provisioned rbd images this is causing issue for the transfer of the snapshot during the mirroring operation. This commit recreates the rbd image with 1MiB size which is the smaller supported size in rbd. Signed-off-by: Madhu Rajanna --- internal/rbd/replicationcontrollerserver.go | 33 +++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index 11a8bd906..8387f776c 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -300,23 +300,46 @@ func getOperationName(poolName string, optName operation) string { // createDummyImage creates a dummy image as a workaround for the rbd // scheduling problem. func createDummyImage(ctx context.Context, rbdVol *rbdVolume) error { + var err error + var imgName string + + dummyImageOpsLock.Lock() + defer dummyImageOpsLock.Unlock() optName := getOperationName(rbdVol.Pool, dummyImageCreated) if _, ok := operationLock.Load(optName); !ok { // create a dummy image - imgName, err := getDummyImageName(rbdVol.conn) + imgName, err = getDummyImageName(rbdVol.conn) if err != nil { return err } dummyVol := *rbdVol dummyVol.RbdImageName = imgName + // create 1MiB dummy image. 1MiB=1048576 bytes + dummyVol.VolSize = 1048576 err = createImage(ctx, &dummyVol, dummyVol.conn.Creds) - if err != nil && !strings.Contains(err.Error(), "File exists") { - return err + if err != nil { + if strings.Contains(err.Error(), "File exists") { + err = repairDummyImage(ctx, &dummyVol) + } + } + if err == nil { + operationLock.Store(optName, true) } - operationLock.Store(optName, true) } - return nil + return err +} + +// repairDummyImage deletes and recreates the dummy image. +func repairDummyImage(ctx context.Context, dummyVol *rbdVolume) error { + // deleting and recreating the dummy image will not impact anything as its + // a workaround to fix the scheduling problem. + err := deleteImage(ctx, dummyVol, dummyVol.conn.Creds) + if err != nil { + return err + } + + return createImage(ctx, dummyVol, dummyVol.conn.Creds) } // tickleMirroringOnDummyImage disables and reenables mirroring on the dummy image, and sets a From 8081ac82513c4e9b02dd18d99e0b4a2b3b8334cb Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 7 Dec 2021 11:15:28 +0530 Subject: [PATCH 5/5] rbd: add new image features for dummy image The dummy image will be created with 1Mib size. during the snapshot transfer operation the 1Mib will be transferred even if the dummy image doesnot contains any data. adding the new image features `fast-diff,layering,obj-map,exclusive-lock`on the dummy image will ensure that only the diff is transferred to the remote cluster. Signed-off-by: Madhu Rajanna --- internal/rbd/replicationcontrollerserver.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index 8387f776c..e99db7847 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -314,6 +314,14 @@ func createDummyImage(ctx context.Context, rbdVol *rbdVolume) error { } dummyVol := *rbdVol dummyVol.RbdImageName = imgName + f := []string{ + librbd.FeatureNameLayering, + librbd.FeatureNameObjectMap, + librbd.FeatureNameExclusiveLock, + librbd.FeatureNameFastDiff, + } + features := librbd.FeatureSetFromNames(f) + dummyVol.imageFeatureSet = features // create 1MiB dummy image. 1MiB=1048576 bytes dummyVol.VolSize = 1048576 err = createImage(ctx, &dummyVol, dummyVol.conn.Creds) @@ -332,6 +340,10 @@ func createDummyImage(ctx context.Context, rbdVol *rbdVolume) error { // repairDummyImage deletes and recreates the dummy image. func repairDummyImage(ctx context.Context, dummyVol *rbdVolume) error { + // instead of checking the images features and than adding missing image + // features, updating the image size to 1Mib. We will delete the image + // and recreate it. + // deleting and recreating the dummy image will not impact anything as its // a workaround to fix the scheduling problem. err := deleteImage(ctx, dummyVol, dummyVol.conn.Creds)