diff --git a/e2e/cephfs.go b/e2e/cephfs.go index afb3a516f..b59827b9d 100644 --- a/e2e/cephfs.go +++ b/e2e/cephfs.go @@ -280,6 +280,7 @@ var _ = Describe("cephfs", func() { It("Test CephFS CSI", func() { pvcPath := cephFSExamplePath + "pvc.yaml" appPath := cephFSExamplePath + "pod.yaml" + deplPath := cephFSExamplePath + "deployment.yaml" appRWOPPath := cephFSExamplePath + "pod-rwop.yaml" pvcClonePath := cephFSExamplePath + "pvc-restore.yaml" pvcSmartClonePath := cephFSExamplePath + "pvc-clone.yaml" @@ -504,6 +505,134 @@ var _ = Describe("cephfs", func() { } }) + By("verifying that ceph-fuse recovery works for new pods", func() { + err := deleteResource(cephFSExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete CephFS storageclass: %v", err) + } + err = createCephfsStorageClass(f.ClientSet, f, true, map[string]string{ + "mounter": "fuse", + }) + if err != nil { + e2elog.Failf("failed to create CephFS storageclass: %v", err) + } + replicas := int32(2) + pvc, depl, err := validatePVCAndDeploymentAppBinding( + f, pvcPath, deplPath, f.UniqueName, &replicas, deployTimeout, + ) + if err != nil { + e2elog.Failf("failed to create PVC and Deployment: %v", err) + } + deplPods, err := listPods(f, depl.Namespace, &metav1.ListOptions{ + LabelSelector: fmt.Sprintf("app=%s", depl.Labels["app"]), + }) + if err != nil { + e2elog.Failf("failed to list pods for Deployment: %v", err) + } + + doStat := func(podName string) (stdErr string, err error) { + _, stdErr, err = execCommandInContainerByPodName( + f, + fmt.Sprintf("stat %s", depl.Spec.Template.Spec.Containers[0].VolumeMounts[0].MountPath), + depl.Namespace, + podName, + depl.Spec.Template.Spec.Containers[0].Name, + ) + + return stdErr, err + } + ensureStatSucceeds := func(podName string) error { + stdErr, statErr := doStat(podName) + if statErr != nil || stdErr != "" { + return fmt.Errorf( + "expected stat to succeed without error output ; got err %w, stderr %s", + statErr, stdErr, + ) + } + + return nil + } + + pod1Name, pod2Name := deplPods[0].Name, deplPods[1].Name + + // stat() ceph-fuse mountpoints to make sure they are working. + for i := range deplPods { + err = ensureStatSucceeds(deplPods[i].Name) + if err != nil { + e2elog.Failf(err.Error()) + } + } + // Kill ceph-fuse in cephfs-csi node plugin Pods. + nodePluginSelector, err := getDaemonSetLabelSelector(f, cephCSINamespace, cephFSDeamonSetName) + if err != nil { + e2elog.Failf("failed to get node plugin DaemonSet label selector: %v", err) + } + _, stdErr, err := execCommandInContainer( + f, "killall -9 ceph-fuse", cephCSINamespace, "csi-cephfsplugin", &metav1.ListOptions{ + LabelSelector: nodePluginSelector, + }, + ) + if err != nil { + e2elog.Failf("killall command failed: err %v, stderr %s", err, stdErr) + } + // Verify Pod podName2 that stat()-ing the mountpoint results in ENOTCONN. + stdErr, err = doStat(pod2Name) + if err == nil || !strings.Contains(stdErr, "not connected") { + e2elog.Failf( + "expected stat to fail with 'Transport endpoint not connected' or 'Socket not connected'; got err %v, stderr %s", + err, stdErr, + ) + } + // Delete podName2 Pod. This serves two purposes: it verifies that deleting pods with + // corrupted ceph-fuse mountpoints works, and it lets the replicaset controller recreate + // the pod with hopefully mounts working again. + err = deletePod(pod2Name, depl.Namespace, c, deployTimeout) + if err != nil { + e2elog.Failf(err.Error()) + } + // Wait for the second Pod to be recreated. + err = waitForDeploymentComplete(c, depl.Name, depl.Namespace, deployTimeout) + if err != nil { + e2elog.Failf(err.Error()) + } + // List Deployment's pods again to get name of the new pod. + deplPods, err = listPods(f, depl.Namespace, &metav1.ListOptions{ + LabelSelector: fmt.Sprintf("app=%s", depl.Labels["app"]), + }) + if err != nil { + e2elog.Failf("failed to list pods for Deployment: %v", err) + } + for i := range deplPods { + if deplPods[i].Name != pod1Name { + pod2Name = deplPods[i].Name + + break + } + } + if pod2Name == "" { + podNames := make([]string, len(deplPods)) + for i := range deplPods { + podNames[i] = deplPods[i].Name + } + e2elog.Failf("no new replica found ; found replicas %v", podNames) + } + // Verify Pod podName3 has its ceph-fuse mount working again. + err = ensureStatSucceeds(pod2Name) + if err != nil { + e2elog.Failf(err.Error()) + } + + // Delete created resources. + err = deletePVCAndDeploymentApp(f, pvc, depl) + if err != nil { + e2elog.Failf("failed to delete PVC and Deployment: %v", err) + } + err = deleteResource(cephFSExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete CephFS storageclass: %v", err) + } + }) + By("create a PVC and bind it to an app", func() { err := createCephfsStorageClass(f.ClientSet, f, false, nil) if err != nil { diff --git a/e2e/pod.go b/e2e/pod.go index 799646636..394b92a18 100644 --- a/e2e/pod.go +++ b/e2e/pod.go @@ -214,6 +214,29 @@ func execCommandInContainer( return stdOut, stdErr, err } +func execCommandInContainerByPodName( + f *framework.Framework, shellCmd, namespace, podName, containerName string, +) (string, string, error) { + cmd := []string{"/bin/sh", "-c", shellCmd} + execOpts := framework.ExecOptions{ + Command: cmd, + PodName: podName, + Namespace: namespace, + ContainerName: containerName, + Stdin: nil, + CaptureStdout: true, + CaptureStderr: true, + PreserveWhitespace: true, + } + + stdOut, stdErr, err := f.ExecWithOptions(execOpts) + if stdErr != "" { + e2elog.Logf("stdErr occurred: %v", stdErr) + } + + return stdOut, stdErr, err +} + func execCommandInToolBoxPod(f *framework.Framework, c, ns string) (string, string, error) { opt := &metav1.ListOptions{ LabelSelector: rookToolBoxPodLabel, diff --git a/e2e/rbd.go b/e2e/rbd.go index fa9020025..ebd15b1fc 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -984,7 +984,7 @@ var _ = Describe("RBD", func() { } app.Namespace = f.UniqueName - err = createPVCAndDeploymentApp(f, "", pvc, app, deployTimeout) + err = createPVCAndDeploymentApp(f, pvc, app, deployTimeout) if err != nil { e2elog.Failf("failed to create PVC and application: %v", err) } @@ -1014,7 +1014,7 @@ var _ = Describe("RBD", func() { } } - err = deletePVCAndDeploymentApp(f, "", pvc, app) + err = deletePVCAndDeploymentApp(f, pvc, app) if err != nil { e2elog.Failf("failed to delete PVC and application: %v", err) } @@ -1093,7 +1093,7 @@ var _ = Describe("RBD", func() { appClone.Namespace = f.UniqueName appClone.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = pvcClone.Name appClone.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ReadOnly = true - err = createPVCAndDeploymentApp(f, "", pvcClone, appClone, deployTimeout) + err = createPVCAndDeploymentApp(f, pvcClone, appClone, deployTimeout) if err != nil { e2elog.Failf("failed to create PVC and application: %v", err) } @@ -1131,7 +1131,7 @@ var _ = Describe("RBD", func() { } } - err = deletePVCAndDeploymentApp(f, "", pvcClone, appClone) + err = deletePVCAndDeploymentApp(f, pvcClone, appClone) if err != nil { e2elog.Failf("failed to delete PVC and application: %v", err) } @@ -1217,7 +1217,7 @@ var _ = Describe("RBD", func() { appClone.Namespace = f.UniqueName appClone.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = pvcClone.Name appClone.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ReadOnly = true - err = createPVCAndDeploymentApp(f, "", pvcClone, appClone, deployTimeout) + err = createPVCAndDeploymentApp(f, pvcClone, appClone, deployTimeout) if err != nil { e2elog.Failf("failed to create PVC and application: %v", err) } @@ -1254,7 +1254,7 @@ var _ = Describe("RBD", func() { e2elog.Failf(stdErr) } } - err = deletePVCAndDeploymentApp(f, "", pvcClone, appClone) + err = deletePVCAndDeploymentApp(f, pvcClone, appClone) if err != nil { e2elog.Failf("failed to delete PVC and application: %v", err) } diff --git a/e2e/utils.go b/e2e/utils.go index 2882fea95..8bc873d5f 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -191,19 +191,12 @@ func createPVCAndApp( return err } -// createPVCAndDeploymentApp creates pvc and deployment, if name is not empty -// same will be set as pvc and app name. +// createPVCAndDeploymentApp creates pvc and deployment. func createPVCAndDeploymentApp( f *framework.Framework, - name string, pvc *v1.PersistentVolumeClaim, app *appsv1.Deployment, pvcTimeout int) error { - if name != "" { - pvc.Name = name - app.Name = name - app.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = name - } err := createPVCAndvalidatePV(f.ClientSet, pvc, pvcTimeout) if err != nil { return err @@ -213,19 +206,50 @@ func createPVCAndDeploymentApp( return err } -// DeletePVCAndDeploymentApp deletes pvc and deployment, if name is not empty -// same will be set as pvc and app name. -func deletePVCAndDeploymentApp( +// validatePVCAndDeploymentAppBinding creates PVC and Deployment, and waits until +// all its replicas are Running. Use `replicas` to override default number of replicas +// defined in `deploymentPath` Deployment manifest. +func validatePVCAndDeploymentAppBinding( f *framework.Framework, - name string, - pvc *v1.PersistentVolumeClaim, - app *appsv1.Deployment) error { - if name != "" { - pvc.Name = name - app.Name = name - app.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = name + pvcPath string, + deploymentPath string, + namespace string, + replicas *int32, + pvcTimeout int, +) (*v1.PersistentVolumeClaim, *appsv1.Deployment, error) { + pvc, err := loadPVC(pvcPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to load PVC: %w", err) + } + pvc.Namespace = namespace + + depl, err := loadAppDeployment(deploymentPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to load Deployment: %w", err) + } + depl.Namespace = f.UniqueName + if replicas != nil { + depl.Spec.Replicas = replicas } + err = createPVCAndDeploymentApp(f, pvc, depl, pvcTimeout) + if err != nil { + return nil, nil, err + } + + err = waitForDeploymentComplete(f.ClientSet, depl.Name, depl.Namespace, deployTimeout) + if err != nil { + return nil, nil, err + } + + return pvc, depl, nil +} + +// DeletePVCAndDeploymentApp deletes pvc and deployment. +func deletePVCAndDeploymentApp( + f *framework.Framework, + pvc *v1.PersistentVolumeClaim, + app *appsv1.Deployment) error { err := deleteDeploymentApp(f.ClientSet, app.Name, app.Namespace, deployTimeout) if err != nil { return err