From caf409065751b16cea98ff99fba39caeb70cc589 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 12 Apr 2022 09:33:00 +0530 Subject: [PATCH 1/6] rbd: provide option to disable setting metadata on rbd images As we added support to set the metadata on the rbd images created for the PVC and volume snapshot, by default metadata is set on all the images. As we have seen we are hitting issues#2327 a lot of times with this, we start to leave a lot of stale images. Currently, we rely on `--extra-create-metadata=true` to decide to set the metadata or not, we cannot set this option to false to disable setting metadata because we use this for encryption too. This changes is to provide an option to disable setting the image metadata when starting cephcsi. Fixes: #3009 Signed-off-by: Prasanna Kumar Kalever --- cmd/cephcsi.go | 2 ++ internal/controller/controller.go | 1 + .../controller/persistentvolume/persistentvolume.go | 1 + internal/rbd/controllerserver.go | 7 +++++++ internal/rbd/driver/driver.go | 1 + internal/rbd/rbd_journal.go | 2 ++ internal/rbd/rbd_util.go | 10 ++++++++++ internal/util/util.go | 2 ++ 8 files changed, 26 insertions(+) diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index db5239fe9..71b3ce6ea 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -69,6 +69,7 @@ func init() { flag.StringVar(&conf.PluginPath, "pluginpath", defaultPluginPath, "plugin path") flag.StringVar(&conf.StagingPath, "stagingpath", defaultStagingPath, "staging path") flag.StringVar(&conf.ClusterName, "clustername", "", "name of the cluster") + flag.BoolVar(&conf.SetMetadata, "setmetadata", false, "set metadata on the volume") flag.StringVar(&conf.InstanceID, "instanceid", "", "Unique ID distinguishing this instance of Ceph CSI among other"+ " instances, when sharing Ceph clusters across CSI instances for provisioning") flag.IntVar(&conf.PidLimit, "pidlimit", 0, "the PID limit to configure through cgroups") @@ -251,6 +252,7 @@ func main() { DriverName: dname, Namespace: conf.DriverNamespace, ClusterName: conf.ClusterName, + SetMetadata: conf.SetMetadata, } // initialize all controllers before starting. initControllers() diff --git a/internal/controller/controller.go b/internal/controller/controller.go index bf608ca12..7bc6e157d 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -38,6 +38,7 @@ type Config struct { DriverName string Namespace string ClusterName string + SetMetadata bool } // ControllerList holds the list of managers need to be started. diff --git a/internal/controller/persistentvolume/persistentvolume.go b/internal/controller/persistentvolume/persistentvolume.go index 5183fbb73..fe2ed5a60 100644 --- a/internal/controller/persistentvolume/persistentvolume.go +++ b/internal/controller/persistentvolume/persistentvolume.go @@ -185,6 +185,7 @@ func (r ReconcilePersistentVolume) reconcilePV(ctx context.Context, obj runtime. requestName, pvcNamespace, r.config.ClusterName, + r.config.SetMetadata, cr) if err != nil { log.ErrorLogMsg("failed to regenerate journal %s", err) diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index b67760ddb..b6a8f7e6a 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -55,6 +55,9 @@ type ControllerServer struct { // Cluster name ClusterName string + + // Set metadata on volume + SetMetadata bool } func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.CreateVolumeRequest) error { @@ -173,7 +176,10 @@ func (cs *ControllerServer) parseVolCreateRequest( return nil, status.Error(codes.InvalidArgument, err.Error()) } + // set cluster name on volume rbdVol.ClusterName = cs.ClusterName + // set metadata on volume + rbdVol.EnableMetadata = cs.SetMetadata // if the KMS is of type VaultToken, additional metadata is needed // depending on the tenant, the KMS can be configured with other @@ -1061,6 +1067,7 @@ func (cs *ControllerServer) CreateSnapshot( return nil, err } + rbdVol.EnableMetadata = cs.SetMetadata // Check if source volume was created with required image features for snaps if !rbdVol.hasSnapshotFeature() { diff --git a/internal/rbd/driver/driver.go b/internal/rbd/driver/driver.go index a74cd2552..5019d02cc 100644 --- a/internal/rbd/driver/driver.go +++ b/internal/rbd/driver/driver.go @@ -162,6 +162,7 @@ func (r *Driver) Run(conf *util.Config) { if conf.IsControllerServer { r.cs = NewControllerServer(r.cd) r.cs.ClusterName = conf.ClusterName + r.cs.SetMetadata = conf.SetMetadata r.rs = NewReplicationServer(r.cs) } if !conf.IsControllerServer && !conf.IsNodeServer { diff --git a/internal/rbd/rbd_journal.go b/internal/rbd/rbd_journal.go index 9da5b3ad8..eadfa99e7 100644 --- a/internal/rbd/rbd_journal.go +++ b/internal/rbd/rbd_journal.go @@ -543,6 +543,7 @@ func RegenerateJournal( requestName, owner, clusterName string, + setMetadata bool, cr *util.Credentials, ) (string, error) { ctx := context.Background() @@ -557,6 +558,7 @@ func RegenerateJournal( rbdVol = &rbdVolume{} rbdVol.VolID = volumeID rbdVol.ClusterName = clusterName + rbdVol.EnableMetadata = setMetadata err = vi.DecomposeCSIID(rbdVol.VolID) if err != nil { diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 5f26b2579..59c26f187 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -132,6 +132,8 @@ type rbdImage struct { // Cluster name ClusterName string + // Set metadata on volume + EnableMetadata bool // encryption provides access to optional VolumeEncryption functions encryption *util.VolumeEncryption @@ -2061,6 +2063,10 @@ func genVolFromVolIDWithMigration( // setAllMetadata set all the metadata from arg parameters on RBD image. func (rv *rbdVolume) setAllMetadata(parameters map[string]string) error { + if !rv.EnableMetadata { + return nil + } + for k, v := range parameters { err := rv.SetMetadata(k, v) if err != nil { @@ -2081,6 +2087,10 @@ func (rv *rbdVolume) setAllMetadata(parameters map[string]string) error { // unsetAllMetadata unset all the metadata from arg keys on RBD image. func (rv *rbdVolume) unsetAllMetadata(keys []string) error { + if !rv.EnableMetadata { + return nil + } + for _, key := range keys { err := rv.RemoveMetadata(key) // TODO: replace string comparison with errno. diff --git a/internal/util/util.go b/internal/util/util.go index f8d81a3d4..b9d7ea173 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -101,6 +101,8 @@ type Config struct { // cephfs related flags ForceKernelCephFS bool // force to use the ceph kernel client even if the kernel is < 4.17 + SetMetadata bool // set metadata on the volume + // RbdHardMaxCloneDepth is the hard limit for maximum number of nested volume clones that are taken before a flatten // occurs RbdHardMaxCloneDepth uint From dc738b96b4fe6346bf51588249696b2ca423a296 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 22 Feb 2022 11:29:11 +0530 Subject: [PATCH 2/6] deploy: add setmetadata=true in the templates setmetadata on the volume by default, otherwise e2e will fail Signed-off-by: Prasanna Kumar Kalever --- charts/ceph-csi-rbd/templates/provisioner-deployment.yaml | 2 ++ charts/ceph-csi-rbd/values.yaml | 3 +++ deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml | 2 ++ 3 files changed, 7 insertions(+) diff --git a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml index d38203af6..540b8add8 100644 --- a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml +++ b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml @@ -155,6 +155,7 @@ spec: {{- if .Values.provisioner.clustername }} - "--clustername={{ .Values.provisioner.clustername }}" {{- end }} + - "--setmetadata={{ .Values.provisioner.setmetadata }}" env: - name: POD_IP valueFrom: @@ -205,6 +206,7 @@ spec: {{- if .Values.provisioner.clustername }} - "--clustername={{ .Values.provisioner.clustername }}" {{- end }} + - "--setmetadata={{ .Values.provisioner.setmetadata }}" env: - name: DRIVER_NAMESPACE valueFrom: diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index d48af1c32..8c67572e6 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -210,6 +210,9 @@ provisioner: pullPolicy: IfNotPresent resources: {} + # set metadata on volume + setmetadata: true + attacher: name: attacher enabled: true diff --git a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml index b3877bb46..9acc3f9d9 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml @@ -131,6 +131,7 @@ spec: - "--rbdhardmaxclonedepth=8" - "--rbdsoftmaxclonedepth=4" - "--enableprofiling=false" + - "--setmetadata=true" env: - name: POD_IP valueFrom: @@ -180,6 +181,7 @@ spec: - "--v=5" - "--drivername=rbd.csi.ceph.com" - "--drivernamespace=$(DRIVER_NAMESPACE)" + - "--setmetadata=true" env: - name: DRIVER_NAMESPACE valueFrom: From af0bdaf2cb52a54cccd3323cdfd254e5b015951f Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 22 Feb 2022 16:48:24 +0530 Subject: [PATCH 3/6] doc: Add documentation about `--setmetadata` option Fixes: #2874 Signed-off-by: Prasanna Kumar Kalever --- charts/ceph-csi-rbd/README.md | 1 + docs/deploy-rbd.md | 1 + 2 files changed, 2 insertions(+) diff --git a/charts/ceph-csi-rbd/README.md b/charts/ceph-csi-rbd/README.md index 9ffc7c65f..9e5b413e6 100644 --- a/charts/ceph-csi-rbd/README.md +++ b/charts/ceph-csi-rbd/README.md @@ -112,6 +112,7 @@ charts and their default values. | `provisioner.skipForceFlatten` | Skip image flattening if kernel support mapping of rbd images which has the deep-flatten feature | `false` | | `provisioner.timeout` | GRPC timeout for waiting for creation or deletion of a volume | `60s` | | `provisioner.clustername` | Cluster name to set on the RBD image | "" | +| `provisioner.setmetadata` | Set metadata on volume | `true` | | `provisioner.priorityClassName` | Set user created priorityclassName for csi provisioner pods. Default is `system-cluster-critical` which is less priority than `system-node-critical` | `system-cluster-critical` | | `provisioner.profiling.enabled` | Specifies whether profiling should be enabled | `false` | | `provisioner.provisioner.image.repository` | Specifies the csi-provisioner image repository URL | `registry.k8s.io/sig-storage/csi-provisioner` | diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index bdbaf101f..cc5f3c6b9 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -47,6 +47,7 @@ make image-cephcsi | `--rbdsoftmaxclonedepth` | `4` | Soft limit for maximum number of nested volume clones that are taken before a flatten occurs | | `--skipforceflatten` | `false` | skip image flattening on kernel < 5.2 which support mapping of rbd images which has the deep-flatten feature | | `--maxsnapshotsonimage` | `450` | Maximum number of snapshots allowed on rbd image without flattening | +| `--setmetadata` | `false` | Set metadata on volume | **Available volume parameters:** From 29a3f4acf6074b81136951df92cf468c190d9da9 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 16 Jun 2022 16:21:49 +0530 Subject: [PATCH 4/6] cleanup: ReconcilePersistentVolume consider passing it by pointer Address: hugeParam linter internal/controller/persistentvolume/persistentvolume.go:59:7: hugeParam: r is heavy (80 bytes); consider passing it by pointer (gocritic) [...] internal/controller/persistentvolume/persistentvolume.go:135:7: hugeParam: r is heavy (80 bytes); consider passing it by pointer (gocritic) func (r ReconcilePersistentVolume) reconcilePV(ctx context.Context, obj runtime.Object) error {} Signed-off-by: Prasanna Kumar Kalever --- internal/controller/persistentvolume/persistentvolume.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/controller/persistentvolume/persistentvolume.go b/internal/controller/persistentvolume/persistentvolume.go index fe2ed5a60..96c21e150 100644 --- a/internal/controller/persistentvolume/persistentvolume.go +++ b/internal/controller/persistentvolume/persistentvolume.go @@ -52,11 +52,11 @@ var ( // Init will add the ReconcilePersistentVolume to the list. func Init() { // add ReconcilePersistentVolume to the list - ctrl.ControllerList = append(ctrl.ControllerList, ReconcilePersistentVolume{}) + ctrl.ControllerList = append(ctrl.ControllerList, &ReconcilePersistentVolume{}) } // Add adds the newPVReconciler. -func (r ReconcilePersistentVolume) Add(mgr manager.Manager, config ctrl.Config) error { +func (r *ReconcilePersistentVolume) Add(mgr manager.Manager, config ctrl.Config) error { return add(mgr, newPVReconciler(mgr, config)) } @@ -132,7 +132,7 @@ func checkStaticVolume(pv *corev1.PersistentVolume) bool { // reconcilePV will extract the image details from the pv spec and regenerates // the omap data. -func (r ReconcilePersistentVolume) reconcilePV(ctx context.Context, obj runtime.Object) error { +func (r *ReconcilePersistentVolume) reconcilePV(ctx context.Context, obj runtime.Object) error { pv, ok := obj.(*corev1.PersistentVolume) if !ok { return nil From 9fa3c8382b0dab0c2eaed27dc9c4dbe84a3642cf Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 16 Jun 2022 16:28:31 +0530 Subject: [PATCH 5/6] cleanup: reduce struct padding internal/rbd/rbd_util.go:89:15: struct of size 312 bytes could be of size 304 bytes: `` struct{ RbdImageName string, ImageID string, VolID string, Monitors string, JournalPool string, Pool string, RadosNamespace string, ClusterID string, RequestName string, NamePrefix string, ParentName string, ParentPool string, ClusterName string, Owner string, VolSize int64, StripeCount uint64, StripeUnit uint64, ObjectSize uint64, ImageFeatureSet github.com/ceph/go-ceph/rbd.FeatureSet, encryption *github.com/ceph/ceph-csi/internal/util.VolumeEncryption, CreatedAt *google.golang.org/protobuf/types/known/timestamppb.Timestamp, conn *github.com/ceph/ceph-csi/internal/util.ClusterConnection, ioctx *github.com/ceph/go-ceph/rados.IOContext, Primary bool, EnableMetadata bool, } `` (maligned) type rbdImage struct { ^}` make: *** [Makefile:118: go-lint] Error 1 Signed-off-by: Prasanna Kumar Kalever --- internal/rbd/rbd_util.go | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 59c26f187..e5ece5658 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -95,16 +95,7 @@ type rbdImage struct { ImageID string // VolID is the volume ID that is exchanged with CSI drivers, // identifying this rbd image - VolID string `json:"volID"` - - // VolSize is the size of the RBD image backing this rbdImage. - VolSize int64 - - // image striping configurations. - StripeCount uint64 - StripeUnit uint64 - ObjectSize uint64 - + VolID string `json:"volID"` Monitors string // JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is // stored @@ -121,31 +112,36 @@ type rbdImage struct { // config maps in v1.0.0 RequestName string NamePrefix string - // ParentName represents the parent image name of the image. ParentName string // Parent Pool is the pool that contains the parent image. - ParentPool string - ImageFeatureSet librbd.FeatureSet - // Primary represent if the image is primary or not. - Primary bool - + ParentPool string // Cluster name ClusterName string - // Set metadata on volume - EnableMetadata bool - - // encryption provides access to optional VolumeEncryption functions - encryption *util.VolumeEncryption // Owner is the creator (tenant, Kubernetes Namespace) of the volume Owner string - CreatedAt *timestamp.Timestamp + // VolSize is the size of the RBD image backing this rbdImage. + VolSize int64 + // image striping configurations. + StripeCount uint64 + StripeUnit uint64 + ObjectSize uint64 + + ImageFeatureSet librbd.FeatureSet + // encryption provides access to optional VolumeEncryption functions + encryption *util.VolumeEncryption + CreatedAt *timestamp.Timestamp // conn is a connection to the Ceph cluster obtained from a ConnPool conn *util.ClusterConnection // an opened IOContext, call .openIoctx() before using ioctx *rados.IOContext + + // Primary represent if the image is primary or not. + Primary bool + // Set metadata on volume + EnableMetadata bool } // rbdVolume represents a CSI volume and its RBD image specifics. From b56511c0c8110e2c94a30a57d572497c09034820 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Wed, 29 Jun 2022 11:01:07 +0530 Subject: [PATCH 6/6] e2e: reduce defaultCloneCount to 3 CI is failing very frequently hitting resource leaks issue, until we solve the root cause for resource leaks reducing the clone count from 10 to 3. related: #2327 Signed-off-by: Prasanna Kumar Kalever --- e2e/rbd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/rbd.go b/e2e/rbd.go index e222bb291..cafff25b9 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -85,7 +85,7 @@ var ( snapshotPath = rbdExamplePath + "snapshot.yaml" deployFSAppPath = e2eTemplatesPath + "rbd-fs-deployment.yaml" deployBlockAppPath = e2eTemplatesPath + "rbd-block-deployment.yaml" - defaultCloneCount = 10 + defaultCloneCount = 3 // TODO: set to 10 once issues#2327 is fixed nbdMapOptions = "nbd:debug-rbd=20" e2eDefaultCephLogStrategy = "preserve"