mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-01-07 12:29:31 +00:00
rbd: issue resync only if the force flag is set
During failover we do demote the volume on the primary
as the image is still not promoted yet on the remote cluster,
there are spurious split-brain errors reported by RBD,
the Cephcsi resync will attempt to resync from the "known"
secondary and that will cause data loss
Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
(cherry picked from commit 3acaa018db
)
This commit is contained in:
parent
e7066798bb
commit
471c1342b1
@ -855,18 +855,11 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
||||
ready = checkRemoteSiteStatus(ctx, mirrorStatus)
|
||||
}
|
||||
|
||||
if resyncRequired(localStatus) {
|
||||
err = rbdVol.resyncImage()
|
||||
if err != nil {
|
||||
log.ErrorLog(ctx, err.Error())
|
||||
err = resyncVolume(localStatus, rbdVol, req.Force)
|
||||
if err != nil {
|
||||
log.ErrorLog(ctx, err.Error())
|
||||
|
||||
return nil, status.Error(codes.Internal, err.Error())
|
||||
}
|
||||
|
||||
// If we issued a resync, return a non-final error as image needs to be recreated
|
||||
// locally. Caller retries till RBD syncs an initial version of the image to
|
||||
// report its status in the resync request.
|
||||
return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = checkVolumeResyncStatus(localStatus)
|
||||
@ -886,6 +879,29 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func resyncVolume(localStatus librbd.SiteMirrorImageStatus, rbdVol *rbdVolume, force bool) error {
|
||||
if resyncRequired(localStatus) {
|
||||
// If the force option is not set return the error message to retry
|
||||
// with Force option.
|
||||
if !force {
|
||||
return status.Errorf(codes.FailedPrecondition,
|
||||
"image is in %q state, description (%s). Force resync to recover volume",
|
||||
localStatus.State, localStatus.Description)
|
||||
}
|
||||
err := rbdVol.resyncImage()
|
||||
if err != nil {
|
||||
return status.Error(codes.Internal, err.Error())
|
||||
}
|
||||
|
||||
// If we issued a resync, return a non-final error as image needs to be recreated
|
||||
// locally. Caller retries till RBD syncs an initial version of the image to
|
||||
// report its status in the resync request.
|
||||
return status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
|
||||
// we are considering 2 states to check resync started and resync completed
|
||||
// as below. all other states will be considered as an error state so that
|
||||
|
Loading…
Reference in New Issue
Block a user