diff --git a/.github/workflows/dependency-review.yaml b/.github/workflows/dependency-review.yaml index e05efaffd..09bce8cad 100644 --- a/.github/workflows/dependency-review.yaml +++ b/.github/workflows/dependency-review.yaml @@ -17,4 +17,4 @@ jobs: - name: 'Checkout Repository' uses: actions/checkout@v3 - name: 'Dependency Review' - uses: actions/dependency-review-action@v1 + uses: actions/dependency-review-action@v2 diff --git a/actions/retest/main.go b/actions/retest/main.go index f019f333f..26ef0b610 100644 --- a/actions/retest/main.go +++ b/actions/retest/main.go @@ -116,7 +116,7 @@ func main() { log.Fatalf("failed to list pull requests %v\n", err) } for _, re := range req { - if *re.State == "open" { + if (re.State != nil) && (*re.State == "open") { prNumber := re.GetNumber() log.Printf("PR with ID %d with Title %q is open\n", prNumber, re.GetTitle()) for _, l := range re.Labels { @@ -147,6 +147,19 @@ func main() { log.Printf("found context %s with status %s\n", r.GetContext(), r.GetState()) if contains([]string{"failed", "failure"}, r.GetState()) { log.Printf("found failed test %s\n", r.GetContext()) + failedTestFound = true + // rebase the pr if it is behind the devel branch. + if (re.MergeableState != nil) && (*re.MergeableState == "BEHIND") { + comment := &github.IssueComment{ + Body: github.String("@mergifyio rebase"), + } + _, _, err := c.client.Issues.CreateComment(context.TODO(), c.owner, c.repo, prNumber, comment) + if err != nil { + log.Printf("failed to create comment %v\n", err) + } + break + } + // check if retest limit is reached msg := fmt.Sprintf("/retest %s", r.GetContext()) ok, err := c.checkRetestLimitReached(prNumber, msg) @@ -175,7 +188,6 @@ func main() { log.Printf("failed to create comment %v\n", err) continue } - failedTestFound = true } } @@ -188,8 +200,10 @@ func main() { _, _, err = c.client.Issues.CreateComment(context.TODO(), c.owner, c.repo, prNumber, comment) if err != nil { log.Printf("failed to create comment %q: %v\n", msg, err) - continue } + // exit after adding retests to a pr once to avoid retesting multiple prs + // at the same time. + break } } } diff --git a/build.env b/build.env index 142d5693a..87a7ee343 100644 --- a/build.env +++ b/build.env @@ -48,10 +48,10 @@ ROOK_VERSION=v1.9.4 ROOK_CEPH_CLUSTER_IMAGE=quay.io/ceph/ceph:v17 # CSI sidecar version -CSI_ATTACHER_VERSION=v3.4.0 +CSI_ATTACHER_VERSION=v3.5.0 CSI_SNAPSHOTTER_VERSION=v6.0.1 CSI_PROVISIONER_VERSION=v3.1.0 -CSI_RESIZER_VERSION=v1.4.0 +CSI_RESIZER_VERSION=v1.5.0 CSI_NODE_DRIVER_REGISTRAR_VERSION=v2.5.1 # e2e settings diff --git a/charts/ceph-csi-cephfs/README.md b/charts/ceph-csi-cephfs/README.md index 60aa16be6..604e8be53 100644 --- a/charts/ceph-csi-cephfs/README.md +++ b/charts/ceph-csi-cephfs/README.md @@ -109,7 +109,7 @@ charts and their default values. | `provisioner.provisioner.image.tag` | Specifies image tag | `v3.1.0` | | `provisioner.provisioner.image.pullPolicy` | Specifies pull policy | `IfNotPresent` | | `provisioner.resizer.image.repository` | Specifies the csi-resizer image repository URL | `registry.k8s.io/sig-storage/csi-resizer` | -| `provisioner.resizer.image.tag` | Specifies image tag | `v1.4.0` | +| `provisioner.resizer.image.tag` | Specifies image tag | `v1.5.0` | | `provisioner.resizer.image.pullPolicy` | Specifies pull policy | `IfNotPresent` | | `provisioner.resizer.name` | Specifies the name of csi-resizer sidecar | `resizer` | | `provisioner.resizer.enabled` | Specifies whether resizer sidecar is enabled | `true` | diff --git a/charts/ceph-csi-cephfs/values.yaml b/charts/ceph-csi-cephfs/values.yaml index 59902b5e2..b21871a09 100644 --- a/charts/ceph-csi-cephfs/values.yaml +++ b/charts/ceph-csi-cephfs/values.yaml @@ -171,7 +171,7 @@ provisioner: enabled: true image: repository: registry.k8s.io/sig-storage/csi-resizer - tag: v1.4.0 + tag: v1.5.0 pullPolicy: IfNotPresent resources: {} diff --git a/charts/ceph-csi-rbd/README.md b/charts/ceph-csi-rbd/README.md index bb654e532..9ffc7c65f 100644 --- a/charts/ceph-csi-rbd/README.md +++ b/charts/ceph-csi-rbd/README.md @@ -118,12 +118,12 @@ charts and their default values. | `provisioner.provisioner.image.tag` | Specifies image tag | `canary` | | `provisioner.provisioner.image.pullPolicy` | Specifies pull policy | `IfNotPresent` | | `provisioner.attacher.image.repository` | Specifies the csi-attacher image repository URL | `registry.k8s.io/sig-storage/csi-attacher` | -| `provisioner.attacher.image.tag` | Specifies image tag | `v3.4.0` | +| `provisioner.attacher.image.tag` | Specifies image tag | `v3.5.0` | | `provisioner.attacher.image.pullPolicy` | Specifies pull policy | `IfNotPresent` | | `provisioner.attacher.name` | Specifies the name of csi-attacher sidecar | `attacher` | | `provisioner.attacher.enabled` | Specifies whether attacher sidecar is enabled | `true` | | `provisioner.resizer.image.repository` | Specifies the csi-resizer image repository URL | `registry.k8s.io/sig-storage/csi-resizer` | -| `provisioner.resizer.image.tag` | Specifies image tag | `v1.4.0` | +| `provisioner.resizer.image.tag` | Specifies image tag | `v1.5.0` | | `provisioner.resizer.image.pullPolicy` | Specifies pull policy | `IfNotPresent` | | `provisioner.resizer.name` | Specifies the name of csi-resizer sidecar | `resizer` | | `provisioner.resizer.enabled` | Specifies whether resizer sidecar is enabled | `true` | diff --git a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml index decd6e78c..aad875423 100644 --- a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml +++ b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml @@ -67,7 +67,7 @@ spec: args: - "--nodeid=$(NODE_ID)" - "--pluginpath={{ .Values.kubeletDir }}/plugins" - - "--stagingpath={{ .Values.kubeletDir }}/plugins/kubernetes.io/csi/pv/" + - "--stagingpath={{ .Values.kubeletDir }}/plugins/kubernetes.io/csi/" - "--type=rbd" - "--nodeserver=true" - "--pidlimit=-1" diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index b18d3443e..d48af1c32 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -215,7 +215,7 @@ provisioner: enabled: true image: repository: registry.k8s.io/sig-storage/csi-attacher - tag: v3.4.0 + tag: v3.5.0 pullPolicy: IfNotPresent resources: {} @@ -224,7 +224,7 @@ provisioner: enabled: true image: repository: registry.k8s.io/sig-storage/csi-resizer - tag: v1.4.0 + tag: v1.5.0 pullPolicy: IfNotPresent resources: {} diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index 480215de2..db5239fe9 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -54,7 +54,7 @@ const ( defaultNS = "default" defaultPluginPath = "/var/lib/kubelet/plugins" - defaultStagingPath = defaultPluginPath + "/kubernetes.io/csi/pv/" + defaultStagingPath = defaultPluginPath + "/kubernetes.io/csi/" ) var conf util.Config diff --git a/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml b/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml index ed2eeb46f..784494670 100644 --- a/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml +++ b/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml @@ -60,7 +60,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-resizer - image: registry.k8s.io/sig-storage/csi-resizer:v1.4.0 + image: registry.k8s.io/sig-storage/csi-resizer:v1.5.0 args: - "--csi-address=$(ADDRESS)" - "--v=5" diff --git a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml index b0fe29235..b3877bb46 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml @@ -86,7 +86,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-attacher - image: registry.k8s.io/sig-storage/csi-attacher:v3.4.0 + image: registry.k8s.io/sig-storage/csi-attacher:v3.5.0 args: - "--v=5" - "--csi-address=$(ADDRESS)" @@ -100,7 +100,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-resizer - image: registry.k8s.io/sig-storage/csi-resizer:v1.4.0 + image: registry.k8s.io/sig-storage/csi-resizer:v1.5.0 args: - "--csi-address=$(ADDRESS)" - "--v=5" diff --git a/deploy/rbd/kubernetes/csi-rbdplugin.yaml b/deploy/rbd/kubernetes/csi-rbdplugin.yaml index 68f8fa0ad..b9c894cee 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin.yaml @@ -55,7 +55,7 @@ spec: args: - "--nodeid=$(NODE_ID)" - "--pluginpath=/var/lib/kubelet/plugins" - - "--stagingpath=/var/lib/kubelet/plugins/kubernetes.io/csi/pv/" + - "--stagingpath=/var/lib/kubelet/plugins/kubernetes.io/csi/" - "--type=rbd" - "--nodeserver=true" - "--endpoint=$(CSI_ENDPOINT)" diff --git a/go.mod b/go.mod index d3f1d329f..d8407024f 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/hashicorp/vault/api v1.6.0 github.com/kubernetes-csi/csi-lib-utils v0.11.0 - github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.0 + github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.1 github.com/libopenstorage/secrets v0.0.0-20210908194121-a1d19aa9713a github.com/onsi/ginkgo v1.16.5 github.com/onsi/gomega v1.19.0 diff --git a/go.sum b/go.sum index 97bacec2d..03b802029 100644 --- a/go.sum +++ b/go.sum @@ -762,8 +762,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kubernetes-csi/csi-lib-utils v0.11.0 h1:FHWOBtAZBA/hVk7v/qaXgG9Sxv0/n06DebPFuDwumqg= github.com/kubernetes-csi/csi-lib-utils v0.11.0/go.mod h1:BmGZZB16L18+9+Lgg9YWwBKfNEHIDdgGfAyuW6p2NV0= github.com/kubernetes-csi/external-snapshotter/client/v4 v4.0.0/go.mod h1:YBCo4DoEeDndqvAn6eeu0vWM7QdXmHEeI9cFWplmBys= -github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.0 h1:05QzRsnbMQ1Ymg/7iJj1k6RVw+rgelB60Ud5j4GgmGM= -github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.0/go.mod h1:tnHiLn3P10N95fjn7O40QH5ovN0EFGAxqdTpUMrX6bU= +github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.1 h1:OqBS3UAo3eGWplYXoMLaWnx/7Zj5Ogh0VO/FuVOL+/o= +github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.1/go.mod h1:tnHiLn3P10N95fjn7O40QH5ovN0EFGAxqdTpUMrX6bU= github.com/layeh/radius v0.0.0-20190322222518-890bc1058917/go.mod h1:fywZKyu//X7iRzaxLgPWsvc0L26IUpVvE/aeIL2JtIQ= github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= diff --git a/internal/rbd/rbd_healer.go b/internal/rbd/rbd_healer.go index 91a54535b..3014f7ffc 100644 --- a/internal/rbd/rbd_healer.go +++ b/internal/rbd/rbd_healer.go @@ -18,6 +18,9 @@ package rbd import ( "context" + "crypto/sha256" + "fmt" + "path/filepath" "sync" "github.com/ceph/ceph-csi/internal/util" @@ -70,11 +73,39 @@ func getSecret(c *k8s.Clientset, ns, name string) (map[string]string, error) { return deviceSecret, nil } +// formatStagingTargetPath returns the path where the volume is expected to be +// mounted (or the block-device is attached/mapped). Different Kubernetes +// version use different paths. +func formatStagingTargetPath(c *k8s.Clientset, pv *v1.PersistentVolume, stagingPath string) (string, error) { + // Kubernetes 1.24+ uses a hash of the volume-id in the path name + unique := sha256.Sum256([]byte(pv.Spec.CSI.VolumeHandle)) + targetPath := filepath.Join(stagingPath, pv.Spec.CSI.Driver, fmt.Sprintf("%x", unique), "globalmount") + + major, minor, err := kubeclient.GetServerVersion(c) + if err != nil { + return "", fmt.Errorf("failed to get server version: %w", err) + } + + // 'encode' major/minor in a single integer + legacyVersion := 1024 // Kubernetes 1.24 => 1 * 1000 + 24 + if ((major * 1000) + minor) < (legacyVersion) { + // path in Kubernetes < 1.24 + targetPath = filepath.Join(stagingPath, "pv", pv.Name, "globalmount") + } + + return targetPath, nil +} + func callNodeStageVolume(ns *NodeServer, c *k8s.Clientset, pv *v1.PersistentVolume, stagingPath string) error { publishContext := make(map[string]string) volID := pv.Spec.PersistentVolumeSource.CSI.VolumeHandle - stagingParentPath := stagingPath + pv.Name + "/globalmount" + stagingParentPath, err := formatStagingTargetPath(c, pv, stagingPath) + if err != nil { + log.ErrorLogMsg("formatStagingTargetPath failed volID: %s, err: %v", volID, err) + + return err + } log.DefaultLog("sending nodeStageVolume for volID: %s, stagingPath: %s", volID, stagingParentPath) diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index f84b0c7a1..117885345 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -581,7 +581,12 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context, } } - err = checkHealthyPrimary(ctx, rbdVol) + mirrorStatus, err := rbdVol.getImageMirroringStatus() + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + err = checkHealthyPrimary(ctx, rbdVol, mirrorStatus) if err != nil { return nil, status.Error(codes.Internal, err.Error()) } @@ -616,14 +621,9 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context, } // checkHealthyPrimary checks if the image is a healhty primary or not. -// healthy primary image will be in up+stopped state in local cluster and -// up+replaying in the remote clusters, for states other than this it returns -// an error message. -func checkHealthyPrimary(ctx context.Context, rbdVol *rbdVolume) error { - mirrorStatus, err := rbdVol.getImageMirroringStatus() - if err != nil { - return err - } +// healthy primary image will be in up+stopped state, for states other +// than this it returns an error message. +func checkHealthyPrimary(ctx context.Context, rbdVol *rbdVolume, mirrorStatus *librbd.GlobalMirrorImageStatus) error { localStatus, err := mirrorStatus.LocalStatus() if err != nil { // LocalStatus can fail if the local site status is not found in @@ -634,7 +634,7 @@ func checkHealthyPrimary(ctx context.Context, rbdVol *rbdVolume) error { return fmt.Errorf("failed to get local status: %w", err) } - if !localStatus.Up && localStatus.State != librbd.MirrorImageStatusStateStopped { + if !localStatus.Up || localStatus.State != librbd.MirrorImageStatusStateStopped { return fmt.Errorf("%s %w. State is up=%t, state=%q", rbdVol, ErrUnHealthyMirroredImage, @@ -642,26 +642,6 @@ func checkHealthyPrimary(ctx context.Context, rbdVol *rbdVolume) error { localStatus.State) } - // Remote image should be in up+replaying state. - for _, s := range mirrorStatus.SiteStatuses { - log.UsefulLog( - ctx, - "peer site mirrorUUID=%q, daemon up=%t, mirroring state=%q, description=%q and lastUpdate=%d", - s.MirrorUUID, - s.Up, - s.State, - s.Description, - s.LastUpdate) - if s.MirrorUUID != "" { - if !s.Up && s.State != librbd.MirrorImageStatusStateReplaying { - return fmt.Errorf("remote image %s is not healthy. State is up=%t, state=%q", - rbdVol, - s.Up, - s.State) - } - } - } - return nil } @@ -856,18 +836,11 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, ready = checkRemoteSiteStatus(ctx, mirrorStatus) } - if resyncRequired(localStatus) { - err = rbdVol.resyncImage() - if err != nil { - log.ErrorLog(ctx, err.Error()) + err = resyncVolume(localStatus, rbdVol, req.Force) + if err != nil { + log.ErrorLog(ctx, err.Error()) - return nil, status.Error(codes.Internal, err.Error()) - } - - // If we issued a resync, return a non-final error as image needs to be recreated - // locally. Caller retries till RBD syncs an initial version of the image to - // report its status in the resync request. - return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain") + return nil, err } err = checkVolumeResyncStatus(localStatus) @@ -887,6 +860,29 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, return resp, nil } +func resyncVolume(localStatus librbd.SiteMirrorImageStatus, rbdVol *rbdVolume, force bool) error { + if resyncRequired(localStatus) { + // If the force option is not set return the error message to retry + // with Force option. + if !force { + return status.Errorf(codes.FailedPrecondition, + "image is in %q state, description (%s). Force resync to recover volume", + localStatus.State, localStatus.Description) + } + err := rbdVol.resyncImage() + if err != nil { + return status.Error(codes.Internal, err.Error()) + } + + // If we issued a resync, return a non-final error as image needs to be recreated + // locally. Caller retries till RBD syncs an initial version of the image to + // report its status in the resync request. + return status.Error(codes.Unavailable, "awaiting initial resync due to split brain") + } + + return nil +} + func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error { // we are considering 2 states to check resync started and resync completed // as below. all other states will be considered as an error state so that diff --git a/internal/rbd/replicationcontrollerserver_test.go b/internal/rbd/replicationcontrollerserver_test.go index 401292587..87f60a38f 100644 --- a/internal/rbd/replicationcontrollerserver_test.go +++ b/internal/rbd/replicationcontrollerserver_test.go @@ -279,3 +279,75 @@ func TestCheckVolumeResyncStatus(t *testing.T) { }) } } + +func Test_checkHealthyPrimary(t *testing.T) { + t.Parallel() + rbdVol := &rbdVolume{ + rbdImage: rbdImage{ + RbdImageName: "test", + Pool: "test-pool", + }, + } + tests := []struct { + name string + ctx context.Context + rbdVol *rbdVolume + mirrorStatus *librbd.GlobalMirrorImageStatus + wantErr bool + }{ + { + name: "when image is in up+stopped state", + ctx: context.TODO(), + rbdVol: rbdVol, + mirrorStatus: &librbd.GlobalMirrorImageStatus{ + SiteStatuses: []librbd.SiteMirrorImageStatus{ + { + MirrorUUID: "", + State: librbd.MirrorImageStatusStateStopped, + Up: true, + }, + }, + }, + wantErr: false, + }, + { + name: "when image is in up+error state", + ctx: context.TODO(), + rbdVol: rbdVol, + mirrorStatus: &librbd.GlobalMirrorImageStatus{ + SiteStatuses: []librbd.SiteMirrorImageStatus{ + { + MirrorUUID: "", + State: librbd.MirrorImageStatusStateError, + Up: false, + }, + }, + }, + wantErr: true, + }, + { + name: "when image is in up+replaying state", + ctx: context.TODO(), + rbdVol: rbdVol, + mirrorStatus: &librbd.GlobalMirrorImageStatus{ + SiteStatuses: []librbd.SiteMirrorImageStatus{ + { + MirrorUUID: "", + State: librbd.MirrorImageStatusStateReplaying, + Up: true, + }, + }, + }, + wantErr: true, + }, + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + if err := checkHealthyPrimary(ts.ctx, ts.rbdVol, ts.mirrorStatus); (err != nil) != ts.wantErr { + t.Errorf("checkHealthyPrimary() error = %v, wantErr %v", err, ts.wantErr) + } + }) + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index d935b3a9b..292b1baea 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -352,7 +352,7 @@ github.com/kubernetes-csi/csi-lib-utils/connection github.com/kubernetes-csi/csi-lib-utils/metrics github.com/kubernetes-csi/csi-lib-utils/protosanitizer github.com/kubernetes-csi/csi-lib-utils/rpc -# github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.0 +# github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.1 ## explicit; go 1.17 github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1 github.com/kubernetes-csi/external-snapshotter/client/v6/clientset/versioned/scheme