rbd: add workaround for force promote

use ExecCommandWithTimeout with timeout
of 1 minute for the promote operation.
If the command doesnot returns error/response
in 1 minute the process will be killed
and error will be returned to the user.

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
Madhu Rajanna 2021-12-22 09:06:37 +05:30 committed by mergify[bot]
parent 95e9595c1f
commit e4b7943bac
2 changed files with 40 additions and 1 deletions

View File

@ -16,7 +16,11 @@ limitations under the License.
package rbd package rbd
import ( import (
"context"
"fmt" "fmt"
"time"
"github.com/ceph/ceph-csi/internal/util"
librbd "github.com/ceph/go-ceph/rbd" librbd "github.com/ceph/go-ceph/rbd"
) )
@ -84,6 +88,35 @@ func (ri *rbdImage) promoteImage(force bool) error {
return nil return nil
} }
// forcePromoteImage promotes image to primary with force option with 1 minute
// timeout. If there is no response within 1 minute,the rbd CLI process will be
// killed and an error is returned.
func (rv *rbdVolume) forcePromoteImage(cr *util.Credentials) error {
promoteArgs := []string{
"mirror", "image", "promote",
rv.String(),
"--force",
"--id", cr.ID,
"-m", rv.Monitors,
"--keyfile=" + cr.KeyFile,
}
_, stderr, err := util.ExecCommandWithTimeout(
context.TODO(),
time.Minute,
"rbd",
promoteArgs...,
)
if err != nil {
return fmt.Errorf("failed to promote image %q with error: %w", rv, err)
}
if stderr != "" {
return fmt.Errorf("failed to promote image %q with stderror: %s", rv, stderr)
}
return nil
}
// demoteImage demotes image to secondary. // demoteImage demotes image to secondary.
func (ri *rbdImage) demoteImage() error { func (ri *rbdImage) demoteImage() error {
image, err := ri.open() image, err := ri.open()

View File

@ -557,7 +557,13 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context,
// promote secondary to primary // promote secondary to primary
if !mirroringInfo.Primary { if !mirroringInfo.Primary {
err = rbdVol.promoteImage(req.Force) if req.GetForce() {
// workaround for https://github.com/ceph/ceph-csi/issues/2736
// TODO: remove this workaround when the issue is fixed
err = rbdVol.forcePromoteImage(cr)
} else {
err = rbdVol.promoteImage(req.GetForce())
}
if err != nil { if err != nil {
log.ErrorLog(ctx, err.Error()) log.ErrorLog(ctx, err.Error())
// In case of the DR the image on the primary site cannot be // In case of the DR the image on the primary site cannot be