From c41bf37b95a9ec7b15fc0a12d113f75738229686 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Mon, 25 Oct 2021 15:54:12 +0530 Subject: [PATCH] rbd: check local image state for resyncing below are the local states of the mirrored image "unknown" -> If the image is in an error state means data is completely synced "error" -> If the image is in an error state means it needs resync "syncing" "starting_replay" "replaying" "stopping_replay" "stopped" If the resync is successfully started which means the image will be in "replaying" state. we can consider "replaying" state to report resync succesfully going on state. we are discarding the intermediate states like "syncing", "starting_replay" and "stopping_replay". Signed-off-by: Madhu Rajanna (cherry picked from commit 1fd2f28fee704d6b6200ddd8e2670a934c97e18b) --- internal/rbd/replicationcontrollerserver.go | 39 ++++++++-- .../rbd/replicationcontrollerserver_test.go | 76 +++++++++++++++++++ 2 files changed, 107 insertions(+), 8 deletions(-) diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index ec7ef7000..1ad01aa50 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -633,6 +633,14 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, if state == unknown && localStatus.Up { ready = checkRemoteSiteStatus(ctx, mirrorStatus) } + // convert the last update time to UTC + lastUpdateTime := time.Unix(localStatus.LastUpdate, 0).UTC() + util.UsefulLog( + ctx, + "image mirroring state=%s, description=%s and lastUpdate=%s", + localStatus.State.String(), + localStatus.Description, + lastUpdateTime) if resyncRequired(localStatus) { err = rbdVol.resyncImage() @@ -648,14 +656,10 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain") } - // convert the last update time to UTC - lastUpdateTime := time.Unix(localStatus.LastUpdate, 0).UTC() - util.UsefulLog( - ctx, - "image mirroring state=%s, description=%s and lastUpdate=%s", - localStatus.State.String(), - localStatus.Description, - lastUpdateTime) + err = checkVolumeResyncStatus(localStatus) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } resp := &replication.ResyncVolumeResponse{ Ready: ready, @@ -664,6 +668,25 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, return resp, nil } +func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error { + // we are considering 2 states to check resync started and resync completed + // as below. all other states will be considered as an error state so that + // cephCSI can return error message and volume replication operator can + // mark the VolumeReplication status as not resyncing for the volume. + + // If the state is Replaying means the resync is going on. + // Once the volume on remote cluster is demoted and resync + // is completed the image state will be moved to UNKNOWN . + if localStatus.State != librbd.MirrorImageStatusStateReplaying && + localStatus.State != librbd.MirrorImageStatusStateUnknown { + return fmt.Errorf( + "not resyncing. image is in %q state", + localStatus.State) + } + + return nil +} + // resyncRequired returns true if local image is in split-brain state and image // needs resync. func resyncRequired(localStatus librbd.SiteMirrorImageStatus) bool { diff --git a/internal/rbd/replicationcontrollerserver_test.go b/internal/rbd/replicationcontrollerserver_test.go index eb7de989f..1d49e8954 100644 --- a/internal/rbd/replicationcontrollerserver_test.go +++ b/internal/rbd/replicationcontrollerserver_test.go @@ -20,6 +20,7 @@ import ( "reflect" "testing" + librbd "github.com/ceph/go-ceph/rbd" "github.com/ceph/go-ceph/rbd/admin" ) @@ -175,3 +176,78 @@ func TestGetSchedulingDetails(t *testing.T) { }) } } + +func TestCheckVolumeResyncStatus(t *testing.T) { + t.Parallel() + tests := []struct { + name string + args librbd.SiteMirrorImageStatus + wantErr bool + }{ + { + name: "test for unknown state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateUnknown, + }, + wantErr: false, + }, + { + name: "test for error state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateError, + }, + wantErr: true, + }, + { + name: "test for syncing state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateSyncing, + }, + wantErr: true, + }, + { + name: "test for starting_replay state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateStartingReplay, + }, + wantErr: true, + }, + { + name: "test for replaying state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateReplaying, + }, + wantErr: false, + }, + { + name: "test for stopping_replay state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateStoppingReplay, + }, + wantErr: true, + }, + { + name: "test for stopped state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateStopped, + }, + wantErr: true, + }, + { + name: "test for invalid state", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusState(100), + }, + wantErr: true, + }, + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + if err := checkVolumeResyncStatus(ts.args); (err != nil) != ts.wantErr { + t.Errorf("checkVolumeResyncStatus() error = %v, expect error = %v", err, ts.wantErr) + } + }) + } +}