rbd: check local image state for resyncing

below are the local states of the mirrored image

"unknown"  -> If the image is in an error state
means data is completely synced
"error" -> If the image is in an error state
means it needs resync
"syncing"
"starting_replay"
"replaying"
"stopping_replay"
"stopped"

If the resync is successfully started which
means the image will be in "replaying" state.
we can consider "replaying" state to report
resync succesfully going on state.

we are discarding the intermediate states like
"syncing", "starting_replay" and "stopping_replay".

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
(cherry picked from commit 1fd2f28fee)
This commit is contained in:
Madhu Rajanna 2021-10-25 15:54:12 +05:30 committed by mergify[bot]
parent 7d163dab64
commit c41bf37b95
2 changed files with 107 additions and 8 deletions

View File

@ -633,6 +633,14 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
if state == unknown && localStatus.Up { if state == unknown && localStatus.Up {
ready = checkRemoteSiteStatus(ctx, mirrorStatus) ready = checkRemoteSiteStatus(ctx, mirrorStatus)
} }
// convert the last update time to UTC
lastUpdateTime := time.Unix(localStatus.LastUpdate, 0).UTC()
util.UsefulLog(
ctx,
"image mirroring state=%s, description=%s and lastUpdate=%s",
localStatus.State.String(),
localStatus.Description,
lastUpdateTime)
if resyncRequired(localStatus) { if resyncRequired(localStatus) {
err = rbdVol.resyncImage() err = rbdVol.resyncImage()
@ -648,14 +656,10 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain") return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
} }
// convert the last update time to UTC err = checkVolumeResyncStatus(localStatus)
lastUpdateTime := time.Unix(localStatus.LastUpdate, 0).UTC() if err != nil {
util.UsefulLog( return nil, status.Error(codes.Internal, err.Error())
ctx, }
"image mirroring state=%s, description=%s and lastUpdate=%s",
localStatus.State.String(),
localStatus.Description,
lastUpdateTime)
resp := &replication.ResyncVolumeResponse{ resp := &replication.ResyncVolumeResponse{
Ready: ready, Ready: ready,
@ -664,6 +668,25 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return resp, nil return resp, nil
} }
func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
// we are considering 2 states to check resync started and resync completed
// as below. all other states will be considered as an error state so that
// cephCSI can return error message and volume replication operator can
// mark the VolumeReplication status as not resyncing for the volume.
// If the state is Replaying means the resync is going on.
// Once the volume on remote cluster is demoted and resync
// is completed the image state will be moved to UNKNOWN .
if localStatus.State != librbd.MirrorImageStatusStateReplaying &&
localStatus.State != librbd.MirrorImageStatusStateUnknown {
return fmt.Errorf(
"not resyncing. image is in %q state",
localStatus.State)
}
return nil
}
// resyncRequired returns true if local image is in split-brain state and image // resyncRequired returns true if local image is in split-brain state and image
// needs resync. // needs resync.
func resyncRequired(localStatus librbd.SiteMirrorImageStatus) bool { func resyncRequired(localStatus librbd.SiteMirrorImageStatus) bool {

View File

@ -20,6 +20,7 @@ import (
"reflect" "reflect"
"testing" "testing"
librbd "github.com/ceph/go-ceph/rbd"
"github.com/ceph/go-ceph/rbd/admin" "github.com/ceph/go-ceph/rbd/admin"
) )
@ -175,3 +176,78 @@ func TestGetSchedulingDetails(t *testing.T) {
}) })
} }
} }
func TestCheckVolumeResyncStatus(t *testing.T) {
t.Parallel()
tests := []struct {
name string
args librbd.SiteMirrorImageStatus
wantErr bool
}{
{
name: "test for unknown state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateUnknown,
},
wantErr: false,
},
{
name: "test for error state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateError,
},
wantErr: true,
},
{
name: "test for syncing state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateSyncing,
},
wantErr: true,
},
{
name: "test for starting_replay state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateStartingReplay,
},
wantErr: true,
},
{
name: "test for replaying state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateReplaying,
},
wantErr: false,
},
{
name: "test for stopping_replay state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateStoppingReplay,
},
wantErr: true,
},
{
name: "test for stopped state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateStopped,
},
wantErr: true,
},
{
name: "test for invalid state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusState(100),
},
wantErr: true,
},
}
for _, tt := range tests {
ts := tt
t.Run(ts.name, func(t *testing.T) {
t.Parallel()
if err := checkVolumeResyncStatus(ts.args); (err != nil) != ts.wantErr {
t.Errorf("checkVolumeResyncStatus() error = %v, expect error = %v", err, ts.wantErr)
}
})
}
}