From e4b7943bacde6ce5af593b7e97b1902eaa32956c Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 22 Dec 2021 09:06:37 +0530 Subject: [PATCH] rbd: add workaround for force promote use ExecCommandWithTimeout with timeout of 1 minute for the promote operation. If the command doesnot returns error/response in 1 minute the process will be killed and error will be returned to the user. Signed-off-by: Madhu Rajanna --- internal/rbd/mirror.go | 33 +++++++++++++++++++++ internal/rbd/replicationcontrollerserver.go | 8 ++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/internal/rbd/mirror.go b/internal/rbd/mirror.go index 0d010be01..504268a12 100644 --- a/internal/rbd/mirror.go +++ b/internal/rbd/mirror.go @@ -16,7 +16,11 @@ limitations under the License. package rbd import ( + "context" "fmt" + "time" + + "github.com/ceph/ceph-csi/internal/util" librbd "github.com/ceph/go-ceph/rbd" ) @@ -84,6 +88,35 @@ func (ri *rbdImage) promoteImage(force bool) error { return nil } +// forcePromoteImage promotes image to primary with force option with 1 minute +// timeout. If there is no response within 1 minute,the rbd CLI process will be +// killed and an error is returned. +func (rv *rbdVolume) forcePromoteImage(cr *util.Credentials) error { + promoteArgs := []string{ + "mirror", "image", "promote", + rv.String(), + "--force", + "--id", cr.ID, + "-m", rv.Monitors, + "--keyfile=" + cr.KeyFile, + } + _, stderr, err := util.ExecCommandWithTimeout( + context.TODO(), + time.Minute, + "rbd", + promoteArgs..., + ) + if err != nil { + return fmt.Errorf("failed to promote image %q with error: %w", rv, err) + } + + if stderr != "" { + return fmt.Errorf("failed to promote image %q with stderror: %s", rv, stderr) + } + + return nil +} + // demoteImage demotes image to secondary. func (ri *rbdImage) demoteImage() error { image, err := ri.open() diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index 51c0ab6ac..521b4c951 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -557,7 +557,13 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context, // promote secondary to primary if !mirroringInfo.Primary { - err = rbdVol.promoteImage(req.Force) + if req.GetForce() { + // workaround for https://github.com/ceph/ceph-csi/issues/2736 + // TODO: remove this workaround when the issue is fixed + err = rbdVol.forcePromoteImage(cr) + } else { + err = rbdVol.promoteImage(req.GetForce()) + } if err != nil { log.ErrorLog(ctx, err.Error()) // In case of the DR the image on the primary site cannot be