From 4c2d2caf9fa60f14ae6d7fcd4cc4254b76013362 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Thu, 18 Apr 2024 14:02:29 +0200 Subject: [PATCH] util: add support to configure mirror daemon count Currently we are assuming that only one rbd mirror daemon running on the ceph cluster but that is not true for many cases and it can be more that one, this PR make this as a configurable parameter. fixes: #4312 Signed-off-by: Madhu Rajanna --- charts/ceph-csi-rbd/values.yaml | 1 + deploy/csi-config-map-sample.yaml | 3 + internal/rbd/rbd_util.go | 6 +- internal/util/csiconfig.go | 17 ++++ internal/util/csiconfig_test.go | 88 +++++++++++++++++++ .../api/deploy/kubernetes/csi-config-map.go | 2 + 6 files changed, 116 insertions(+), 1 deletion(-) diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 0a8195e74..a3821ff7b 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -27,6 +27,7 @@ serviceAccounts: # - "" # rbd: # netNamespaceFilePath: "{{ .kubeletDir }}/plugins/{{ .driverName }}/net" +# mirrorDaemonCount: 1 # readAffinity: # enabled: true # crushLocationLabels: diff --git a/deploy/csi-config-map-sample.yaml b/deploy/csi-config-map-sample.yaml index d44f96905..e0263a0d8 100644 --- a/deploy/csi-config-map-sample.yaml +++ b/deploy/csi-config-map-sample.yaml @@ -19,6 +19,8 @@ kind: ConfigMap # NOTE: The given radosNamespace must already exists in the pool. # NOTE: Make sure you don't add radosNamespace option to a currently in use # configuration as it will cause issues. +# The "rbd.mirrorDaemonCount" is optional and represents the total number of +# RBD mirror daemons running on the ceph cluster. # The field "cephFS.subvolumeGroup" is optional and defaults to "csi". # NOTE: The given subvolumeGroup must already exist in the filesystem. # The "cephFS.netNamespaceFilePath" fields are the various network namespace @@ -64,6 +66,7 @@ data: "rbd": { "netNamespaceFilePath": "/plugins/rbd.csi.ceph.com/net", "radosNamespace": "", + "mirrorDaemonCount": 1, }, "monitors": [ "", diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index e38528932..fa3f68a77 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -553,9 +553,13 @@ func (ri *rbdImage) isInUse() (bool, error) { // because we opened the image, there is at least one watcher defaultWatchers := 1 if mirrorInfo.Primary { + count, err := util.GetRBDMirrorDaemonCount(util.CsiConfigFile, ri.ClusterID) + if err != nil { + return false, err + } // if rbd mirror daemon is running, a watcher will be added by the rbd // mirror daemon for mirrored images. - defaultWatchers++ + defaultWatchers += count } return len(watchers) > defaultWatchers, nil diff --git a/internal/util/csiconfig.go b/internal/util/csiconfig.go index 0982172d8..6d3e7109a 100644 --- a/internal/util/csiconfig.go +++ b/internal/util/csiconfig.go @@ -45,6 +45,7 @@ const ( "clusterID": "", "rbd": { "radosNamespace": "" + "mirrorDaemonCount": 1 }, "monitors": [ "", @@ -105,6 +106,22 @@ func GetRadosNamespace(pathToConfig, clusterID string) (string, error) { return cluster.RBD.RadosNamespace, nil } +// GetRBDMirrorDaemonCount returns the number of mirror daemon count for the +// given clusterID. +func GetRBDMirrorDaemonCount(pathToConfig, clusterID string) (int, error) { + cluster, err := readClusterInfo(pathToConfig, clusterID) + if err != nil { + return 0, err + } + + // if it is empty, set the default to 1 which is most common in a cluster. + if cluster.RBD.MirrorDaemonCount == 0 { + return 1, nil + } + + return cluster.RBD.MirrorDaemonCount, nil +} + // CephFSSubvolumeGroup returns the subvolumeGroup for CephFS volumes. If not set, it returns the default value "csi". func CephFSSubvolumeGroup(pathToConfig, clusterID string) (string, error) { cluster, err := readClusterInfo(pathToConfig, clusterID) diff --git a/internal/util/csiconfig_test.go b/internal/util/csiconfig_test.go index 8da099076..c616d4c3a 100644 --- a/internal/util/csiconfig_test.go +++ b/internal/util/csiconfig_test.go @@ -17,11 +17,14 @@ limitations under the License. package util import ( + "bytes" "encoding/json" "os" "testing" cephcsi "github.com/ceph/ceph-csi/api/deploy/kubernetes" + + "github.com/stretchr/testify/require" ) var ( @@ -530,3 +533,88 @@ func TestGetCephFSMountOptions(t *testing.T) { }) } } + +func TestGetRBDMirrorDaemonCount(t *testing.T) { + t.Parallel() + tests := []struct { + name string + clusterID string + want int + }{ + { + name: "get rbd mirror daemon count for cluster-1", + clusterID: "cluster-1", + want: 2, + }, + { + name: "get rbd mirror daemon count for cluster-2", + clusterID: "cluster-2", + want: 4, + }, + { + name: "when rbd mirror daemon count is empty", + clusterID: "cluster-3", + want: 1, // default mirror daemon count + }, + } + + csiConfig := []cephcsi.ClusterInfo{ + { + ClusterID: "cluster-1", + Monitors: []string{"ip-1", "ip-2"}, + RBD: cephcsi.RBD{ + MirrorDaemonCount: 2, + }, + }, + { + ClusterID: "cluster-2", + Monitors: []string{"ip-3", "ip-4"}, + RBD: cephcsi.RBD{ + MirrorDaemonCount: 4, + }, + }, + { + ClusterID: "cluster-3", + Monitors: []string{"ip-5", "ip-6"}, + }, + } + csiConfigFileContent, err := json.Marshal(csiConfig) + if err != nil { + t.Errorf("failed to marshal csi config info %v", err) + } + tmpConfPath := t.TempDir() + "/ceph-csi.json" + err = os.WriteFile(tmpConfPath, csiConfigFileContent, 0o600) + if err != nil { + t.Errorf("failed to write %s file content: %v", CsiConfigFile, err) + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + var got int + got, err = GetRBDMirrorDaemonCount(tmpConfPath, ts.clusterID) + if err != nil { + t.Errorf("GetRBDMirrorDaemonCount() error = %v", err) + + return + } + if got != ts.want { + t.Errorf("GetRBDMirrorDaemonCount() = %v, want %v", got, ts.want) + } + }) + } + + // when mirrorDaemonCount is set as string + csiConfigFileContent = bytes.Replace( + csiConfigFileContent, + []byte(`"mirrorDaemonCount":2`), + []byte(`"mirrorDaemonCount":"2"`), + 1) + tmpCSIConfPath := t.TempDir() + "/ceph-csi.json" + err = os.WriteFile(tmpCSIConfPath, csiConfigFileContent, 0o600) + if err != nil { + t.Errorf("failed to write %s file content: %v", CsiConfigFile, err) + } + _, err = GetRBDMirrorDaemonCount(tmpCSIConfPath, "test") + require.Error(t, err) +} diff --git a/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go b/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go index 4d237b3c5..0c418e23d 100644 --- a/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go +++ b/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go @@ -46,6 +46,8 @@ type RBD struct { NetNamespaceFilePath string `json:"netNamespaceFilePath"` // RadosNamespace is a rados namespace in the pool RadosNamespace string `json:"radosNamespace"` + // RBD mirror daemons running in the ceph cluster. + MirrorDaemonCount int `json:"mirrorDaemonCount"` } type NFS struct {