mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-10 00:10:20 +00:00
rbd: consider mirror deamon state for ResyncVolume
During ResyncVolume we check if the image is in an error state, and we resync. After resync, the image will move to either the `Error` or the `Resyncing` state. And if the image is in the above two conditions, we will return a successful response and Ready=false so that the consumer can wait until the volume is ready to use. If the image is in any other state we return an error message to indicate the syncing is not going on. The whole resync and image state change depends on the rbd mirror daemon. If the mirror daemon is not running, the image can be in Resyncing or Unknown state. The Ramen marks the volume replication as secondary, and once the resync starts, it will delete the volume replication CR as a cleanup process. As we dont have a check for the rbd mirror daemon, we are returning a resync success response and Ready=false. Due to this false response Ramen is assuming the resync started and deleted the volume replication CR, and because of this, the cluster goes into a bad state and needs manual intervention. fixes #3289 Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
parent
1ea4a1b790
commit
8d7b6ee59f
@ -857,11 +857,12 @@ func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
|
||||
// If the state is Replaying means the resync is going on.
|
||||
// Once the volume on remote cluster is demoted and resync
|
||||
// is completed the image state will be moved to UNKNOWN.
|
||||
if localStatus.State != librbd.MirrorImageStatusStateReplaying &&
|
||||
localStatus.State != librbd.MirrorImageStatusStateUnknown {
|
||||
// RBD mirror daemon should be always running on the primary cluster.
|
||||
if !localStatus.Up || (localStatus.State != librbd.MirrorImageStatusStateReplaying &&
|
||||
localStatus.State != librbd.MirrorImageStatusStateUnknown) {
|
||||
return fmt.Errorf(
|
||||
"not resyncing. image is in %q state",
|
||||
localStatus.State)
|
||||
"not resyncing. Local status: daemon up=%t image is in %q state",
|
||||
localStatus.Up, localStatus.State)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -212,10 +212,19 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
args librbd.SiteMirrorImageStatus
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "test when rbd mirror daemon is not running",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateUnknown,
|
||||
Up: false,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "test for unknown state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateUnknown,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
@ -223,6 +232,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for error state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateError,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
@ -230,6 +240,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for syncing state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateSyncing,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
@ -237,6 +248,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for starting_replay state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateStartingReplay,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
@ -244,6 +256,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for replaying state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateReplaying,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
@ -251,6 +264,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for stopping_replay state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateStoppingReplay,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
@ -258,6 +272,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for stopped state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusStateStopped,
|
||||
Up: true,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
@ -265,6 +280,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
||||
name: "test for invalid state",
|
||||
args: librbd.SiteMirrorImageStatus{
|
||||
State: librbd.MirrorImageStatusState(100),
|
||||
Up: true,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user