rbd: consider remote image health for primary

To consider the image is healthy during the Promote
operation currently we are checking only the image
state on the primary site. If the network is flaky
or the remote site is down the image health is
not as expected. To make sure the image is healthy
across the clusters check the state on both local
and the remote clusters.

some details:
https://bugzilla.redhat.com/show_bug.cgi?id=2014495

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
Madhu Rajanna 2022-04-11 10:05:07 +05:30 committed by mergify-bot
parent b64c7583a9
commit c3c87f2ef3

View File

@ -615,8 +615,9 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context,
}
// checkHealthyPrimary checks if the image is a healhty primary or not.
// healthy primary image will be in up+stopped state, for states other
// than this it returns an error message.
// healthy primary image will be in up+stopped state in local cluster and
// up+replaying in the remote clusters, for states other than this it returns
// an error message.
func checkHealthyPrimary(ctx context.Context, rbdVol *rbdVolume) error {
mirrorStatus, err := rbdVol.getImageMirroringStatus()
if err != nil {
@ -640,6 +641,26 @@ func checkHealthyPrimary(ctx context.Context, rbdVol *rbdVolume) error {
localStatus.State)
}
// Remote image should be in up+replaying state.
for _, s := range mirrorStatus.SiteStatuses {
log.UsefulLog(
ctx,
"peer site mirrorUUID=%q, daemon up=%t, mirroring state=%q, description=%q and lastUpdate=%d",
s.MirrorUUID,
s.Up,
s.State,
s.Description,
s.LastUpdate)
if s.MirrorUUID != "" {
if !s.Up && s.State != librbd.MirrorImageStatusStateReplaying {
return fmt.Errorf("remote image %s is not healthy. State is up=%t, state=%q",
rbdVol,
s.Up,
s.State)
}
}
}
return nil
}