rbd: issue resync only if the force flag is set

During failover we do demote the volume on the primary
as the image is still not promoted yet on the remote cluster,
there are spurious split-brain errors reported by RBD,
the Cephcsi resync will attempt to resync from the "known"
secondary and that will cause data loss

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
(cherry picked from commit 3acaa018db)
This commit is contained in:
Madhu Rajanna 2022-06-22 11:18:10 +05:30 committed by mergify[bot]
parent e7066798bb
commit 471c1342b1

View File

@ -855,18 +855,11 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
ready = checkRemoteSiteStatus(ctx, mirrorStatus) ready = checkRemoteSiteStatus(ctx, mirrorStatus)
} }
if resyncRequired(localStatus) { err = resyncVolume(localStatus, rbdVol, req.Force)
err = rbdVol.resyncImage() if err != nil {
if err != nil { log.ErrorLog(ctx, err.Error())
log.ErrorLog(ctx, err.Error())
return nil, status.Error(codes.Internal, err.Error()) return nil, err
}
// If we issued a resync, return a non-final error as image needs to be recreated
// locally. Caller retries till RBD syncs an initial version of the image to
// report its status in the resync request.
return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
} }
err = checkVolumeResyncStatus(localStatus) err = checkVolumeResyncStatus(localStatus)
@ -886,6 +879,29 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return resp, nil return resp, nil
} }
func resyncVolume(localStatus librbd.SiteMirrorImageStatus, rbdVol *rbdVolume, force bool) error {
if resyncRequired(localStatus) {
// If the force option is not set return the error message to retry
// with Force option.
if !force {
return status.Errorf(codes.FailedPrecondition,
"image is in %q state, description (%s). Force resync to recover volume",
localStatus.State, localStatus.Description)
}
err := rbdVol.resyncImage()
if err != nil {
return status.Error(codes.Internal, err.Error())
}
// If we issued a resync, return a non-final error as image needs to be recreated
// locally. Caller retries till RBD syncs an initial version of the image to
// report its status in the resync request.
return status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
}
return nil
}
func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error { func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
// we are considering 2 states to check resync started and resync completed // we are considering 2 states to check resync started and resync completed
// as below. all other states will be considered as an error state so that // as below. all other states will be considered as an error state so that