rbd: check for peer site status

Do resync if the image is in unknow or in error
state.

Check for the current image state for up+stopped
or up+replaying and also all peer site status
should be un up+stopped to confirm that resyncing
is done and image can be promoted and used.

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
Madhu Rajanna 2021-03-18 12:46:36 +05:30 committed by mergify[bot]
parent 233954bc10
commit 95387c3b5e
2 changed files with 27 additions and 6 deletions

View File

@ -118,6 +118,12 @@ type imageMirrorStatus struct {
State string `json:"state"` // rbd image state State string `json:"state"` // rbd image state
Description string `json:"description"` Description string `json:"description"`
LastUpdate string `json:"last_update"` LastUpdate string `json:"last_update"`
PeerSites []struct {
SiteName string `json:"site_name"`
State string `json:"state"`
Description string `json:"description"`
LastUpdate string `json:"last_update"`
} `json:"peer_sites"`
} }
// FIXME: once https://github.com/ceph/go-ceph/issues/460 is fixed use go-ceph. // FIXME: once https://github.com/ceph/go-ceph/issues/460 is fixed use go-ceph.

View File

@ -20,6 +20,7 @@ import (
"context" "context"
"errors" "errors"
"strconv" "strconv"
"strings"
"github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util"
@ -50,6 +51,9 @@ const (
// running and stopped means the image is not a target for replication from // running and stopped means the image is not a target for replication from
// another cluster // another cluster
upAndStopped imageMirroringState = "up+stopped" upAndStopped imageMirroringState = "up+stopped"
// If the state is error means image need resync.
errorState imageMirroringState = "error"
) )
const ( const (
@ -417,12 +421,6 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return nil, status.Error(codes.InvalidArgument, "image is in primary state") return nil, status.Error(codes.InvalidArgument, "image is in primary state")
} }
err = rbdVol.resyncImage()
if err != nil {
util.ErrorLog(ctx, err.Error())
return nil, status.Error(codes.Internal, err.Error())
}
// TODO: check the image state and return its ready to use or not
mirrorStatus, err := rbdVol.getImageMirroingStatus() mirrorStatus, err := rbdVol.getImageMirroingStatus()
if err != nil { if err != nil {
// the image gets recreated after issuing resync in that case return // the image gets recreated after issuing resync in that case return
@ -439,8 +437,25 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
ready := false ready := false
state := imageMirroringState(mirrorStatus.State) state := imageMirroringState(mirrorStatus.State)
if state == upAndStopped || state == upAndReplaying { if state == upAndStopped || state == upAndReplaying {
// Make sure the peer site image state is up and stopped
ready = true ready = true
for _, s := range mirrorStatus.PeerSites {
if imageMirroringState(s.State) != upAndStopped {
util.UsefulLog(ctx, "peer site name=%s mirroring state=%s, description=%s and lastUpdate=%s", s.SiteName, s.State, s.Description, s.LastUpdate)
ready = false
} }
}
}
// resync only if the image is in error state
if strings.Contains(mirrorStatus.State, string(errorState)) {
err = rbdVol.resyncImage()
if err != nil {
util.ErrorLog(ctx, err.Error())
return nil, status.Error(codes.Internal, err.Error())
}
}
util.UsefulLog(ctx, "image mirroring state=%s, description=%s and lastUpdate=%s", mirrorStatus.State, mirrorStatus.Description, mirrorStatus.LastUpdate) util.UsefulLog(ctx, "image mirroring state=%s, description=%s and lastUpdate=%s", mirrorStatus.State, mirrorStatus.Description, mirrorStatus.LastUpdate)
resp := &replication.ResyncVolumeResponse{ resp := &replication.ResyncVolumeResponse{
Ready: ready, Ready: ready,