mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-22 14:20:19 +00:00
rbd: fix resync issue
During the Demote volume store the image creation timestamp. During Resync do below operation * Check image creation timestamp stored during Demote operation and current creation timestamp during Resync and check both are equal and its for force resync then issue resync * If the image on both sides is not in unknown state, check last_snapshot_timestamp on the local mirror description, if its present send volumeReady as false or else return error message. If both the images are in up+unknown the send volumeReady as true. Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
parent
4016876c9d
commit
e013cfed15
@ -50,6 +50,11 @@ const (
|
|||||||
// imageMirrorModeJournal uses journaling to propagate RBD images between
|
// imageMirrorModeJournal uses journaling to propagate RBD images between
|
||||||
// ceph clusters.
|
// ceph clusters.
|
||||||
imageMirrorModeJournal imageMirroringMode = "journal"
|
imageMirrorModeJournal imageMirroringMode = "journal"
|
||||||
|
|
||||||
|
// imageCreationTimeKey is the key to get/set the image creation timestamp
|
||||||
|
// on the image metadata. The key is starting with `.rbd` so that it will
|
||||||
|
// not get replicated to remote cluster.
|
||||||
|
imageCreationTimeKey = ".rbd.image.creation_time"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -480,6 +485,14 @@ func (rs *ReplicationServer) DemoteVolume(ctx context.Context,
|
|||||||
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
creationTime, err := rbdVol.GetImageCreationTime()
|
||||||
|
if err != nil {
|
||||||
|
log.ErrorLog(ctx, err.Error())
|
||||||
|
|
||||||
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
mirroringInfo, err := rbdVol.GetImageMirroringInfo()
|
mirroringInfo, err := rbdVol.GetImageMirroringInfo()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.ErrorLog(ctx, err.Error())
|
log.ErrorLog(ctx, err.Error())
|
||||||
@ -497,6 +510,17 @@ func (rs *ReplicationServer) DemoteVolume(ctx context.Context,
|
|||||||
|
|
||||||
// demote image to secondary
|
// demote image to secondary
|
||||||
if mirroringInfo.Primary {
|
if mirroringInfo.Primary {
|
||||||
|
// store the image creation time for resync
|
||||||
|
_, err = rbdVol.GetMetadata(imageCreationTimeKey)
|
||||||
|
if err != nil && errors.Is(err, librbd.ErrNotFound) {
|
||||||
|
err = rbdVol.SetMetadata(imageCreationTimeKey, timestampToString(creationTime))
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
log.ErrorLog(ctx, err.Error())
|
||||||
|
|
||||||
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
err = rbdVol.DemoteImage()
|
err = rbdVol.DemoteImage()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.ErrorLog(ctx, err.Error())
|
log.ErrorLog(ctx, err.Error())
|
||||||
@ -538,6 +562,8 @@ func checkRemoteSiteStatus(ctx context.Context, mirrorStatus *librbd.GlobalMirro
|
|||||||
// ResyncVolume extracts the RBD volume information from the volumeID, If the
|
// ResyncVolume extracts the RBD volume information from the volumeID, If the
|
||||||
// image is present, mirroring is enabled and the image is in demoted state.
|
// image is present, mirroring is enabled and the image is in demoted state.
|
||||||
// If yes it will resync the image to correct the split-brain.
|
// If yes it will resync the image to correct the split-brain.
|
||||||
|
//
|
||||||
|
//nolint:gocyclo,cyclop // TODO: reduce complexity
|
||||||
func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
||||||
req *replication.ResyncVolumeRequest,
|
req *replication.ResyncVolumeRequest,
|
||||||
) (*replication.ResyncVolumeResponse, error) {
|
) (*replication.ResyncVolumeResponse, error) {
|
||||||
@ -578,7 +604,7 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
|||||||
// it takes time for this operation.
|
// it takes time for this operation.
|
||||||
log.ErrorLog(ctx, err.Error())
|
log.ErrorLog(ctx, err.Error())
|
||||||
|
|
||||||
return nil, status.Error(codes.Aborted, err.Error())
|
return nil, status.Errorf(codes.Aborted, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
if mirroringInfo.State != librbd.MirrorImageEnabled {
|
if mirroringInfo.State != librbd.MirrorImageEnabled {
|
||||||
@ -637,15 +663,41 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
|||||||
ready = checkRemoteSiteStatus(ctx, mirrorStatus)
|
ready = checkRemoteSiteStatus(ctx, mirrorStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = rbdVol.ResyncVol(localStatus, req.Force)
|
creationTime, err := rbdVol.GetImageCreationTime()
|
||||||
|
if err != nil {
|
||||||
|
return nil, status.Errorf(codes.Internal, "failed to get image info for %s: %s", rbdVol, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// image creation time is stored in the image metadata. it looks like
|
||||||
|
// `"seconds:1692879841 nanos:631526669"`
|
||||||
|
savedImageTime, err := rbdVol.GetMetadata(imageCreationTimeKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, status.Errorf(codes.Internal,
|
||||||
|
"failed to get %s key from image metadata for %s: %s",
|
||||||
|
imageCreationTimeKey,
|
||||||
|
rbdVol,
|
||||||
|
err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
st, err := timestampFromString(savedImageTime)
|
||||||
|
if err != nil {
|
||||||
|
return nil, status.Errorf(codes.Internal, "failed to parse image creation time: %s", err.Error())
|
||||||
|
}
|
||||||
|
log.DebugLog(ctx, "image %s, savedImageTime=%v, currentImageTime=%v", rbdVol, st, creationTime.AsTime())
|
||||||
|
|
||||||
|
if req.Force && st.Equal(creationTime.AsTime()) {
|
||||||
|
err = rbdVol.ResyncVol(localStatus)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, getGRPCError(err)
|
return nil, getGRPCError(err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ready {
|
||||||
err = checkVolumeResyncStatus(localStatus)
|
err = checkVolumeResyncStatus(localStatus)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, status.Error(codes.Internal, err.Error())
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = rbdVol.RepairResyncedImageID(ctx, ready)
|
err = rbdVol.RepairResyncedImageID(ctx, ready)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -659,6 +711,40 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
|
|||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// timestampToString converts the time.Time object to string.
|
||||||
|
func timestampToString(st *timestamppb.Timestamp) string {
|
||||||
|
return fmt.Sprintf("seconds:%d nanos:%d", st.Seconds, st.Nanos)
|
||||||
|
}
|
||||||
|
|
||||||
|
// timestampFromString parses the timestamp string and returns the time.Time
|
||||||
|
// object.
|
||||||
|
func timestampFromString(timestamp string) (time.Time, error) {
|
||||||
|
st := time.Time{}
|
||||||
|
parts := strings.Fields(timestamp)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return st, fmt.Errorf("failed to parse image creation time: %s", timestamp)
|
||||||
|
}
|
||||||
|
if len(strings.Split(parts[0], ":")) != 2 || len(strings.Split(parts[1], ":")) != 2 {
|
||||||
|
return st, fmt.Errorf("failed to parse image creation time: %s", timestamp)
|
||||||
|
}
|
||||||
|
secondsStr := strings.Split(parts[0], ":")[1]
|
||||||
|
nanosStr := strings.Split(parts[1], ":")[1]
|
||||||
|
|
||||||
|
seconds, err := strconv.ParseInt(secondsStr, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return st, fmt.Errorf("failed to parse image creation time seconds: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
nanos, err := strconv.ParseInt(nanosStr, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return st, fmt.Errorf("failed to parse image creation time nenos: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
st = time.Unix(seconds, nanos)
|
||||||
|
|
||||||
|
return st, nil
|
||||||
|
}
|
||||||
|
|
||||||
func getGRPCError(err error) error {
|
func getGRPCError(err error) error {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return status.Error(codes.OK, codes.OK.String())
|
return status.Error(codes.OK, codes.OK.String())
|
||||||
@ -854,20 +940,17 @@ func getLastSyncInfo(description string) (*replication.GetVolumeReplicationInfoR
|
|||||||
}
|
}
|
||||||
|
|
||||||
func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
|
func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
|
||||||
// we are considering 2 states to check resync started and resync completed
|
// we are considering local snapshot timestamp to check if the resync is
|
||||||
// as below. all other states will be considered as an error state so that
|
// started or not, if we dont see local_snapshot_timestamp in the
|
||||||
// cephCSI can return error message and volume replication operator can
|
// description of localStatus, we are returning error. if we see the local
|
||||||
// mark the VolumeReplication status as not resyncing for the volume.
|
// snapshot timestamp in the description we return resyncing started.
|
||||||
|
description := localStatus.Description
|
||||||
// If the state is Replaying means the resync is going on.
|
resp, err := getLastSyncInfo(description)
|
||||||
// Once the volume on remote cluster is demoted and resync
|
if err != nil {
|
||||||
// is completed the image state will be moved to UNKNOWN.
|
return fmt.Errorf("failed to get last sync info: %w", err)
|
||||||
// RBD mirror daemon should be always running on the primary cluster.
|
}
|
||||||
if !localStatus.Up || (localStatus.State != librbd.MirrorImageStatusStateReplaying &&
|
if resp.LastSyncTime == nil {
|
||||||
localStatus.State != librbd.MirrorImageStatusStateUnknown) {
|
return errors.New("last sync time is nil")
|
||||||
return fmt.Errorf(
|
|
||||||
"not resyncing. Local status: daemon up=%t image is in %q state",
|
|
||||||
localStatus.Up, localStatus.State)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -225,74 +225,26 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
|
|||||||
wantErr bool
|
wantErr bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "test when rbd mirror daemon is not running",
|
name: "test when local_snapshot_timestamp is non zero",
|
||||||
args: librbd.SiteMirrorImageStatus{
|
args: librbd.SiteMirrorImageStatus{
|
||||||
State: librbd.MirrorImageStatusStateUnknown,
|
//nolint:lll // sample output cannot be split into multiple lines.
|
||||||
Up: false,
|
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":1684675261,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "test for unknown state",
|
|
||||||
args: librbd.SiteMirrorImageStatus{
|
|
||||||
State: librbd.MirrorImageStatusStateUnknown,
|
|
||||||
Up: true,
|
|
||||||
},
|
},
|
||||||
wantErr: false,
|
wantErr: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "test for error state",
|
name: "test when local_snapshot_timestamp is zero",
|
||||||
|
//nolint:lll // sample output cannot be split into multiple lines.
|
||||||
args: librbd.SiteMirrorImageStatus{
|
args: librbd.SiteMirrorImageStatus{
|
||||||
State: librbd.MirrorImageStatusStateError,
|
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":0,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
|
||||||
Up: true,
|
|
||||||
},
|
},
|
||||||
wantErr: true,
|
wantErr: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "test for syncing state",
|
name: "test when local_snapshot_timestamp is not present",
|
||||||
|
//nolint:lll // sample output cannot be split into multiple lines.
|
||||||
args: librbd.SiteMirrorImageStatus{
|
args: librbd.SiteMirrorImageStatus{
|
||||||
State: librbd.MirrorImageStatusStateSyncing,
|
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
|
||||||
Up: true,
|
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "test for starting_replay state",
|
|
||||||
args: librbd.SiteMirrorImageStatus{
|
|
||||||
State: librbd.MirrorImageStatusStateStartingReplay,
|
|
||||||
Up: true,
|
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "test for replaying state",
|
|
||||||
args: librbd.SiteMirrorImageStatus{
|
|
||||||
State: librbd.MirrorImageStatusStateReplaying,
|
|
||||||
Up: true,
|
|
||||||
},
|
|
||||||
wantErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "test for stopping_replay state",
|
|
||||||
args: librbd.SiteMirrorImageStatus{
|
|
||||||
State: librbd.MirrorImageStatusStateStoppingReplay,
|
|
||||||
Up: true,
|
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "test for stopped state",
|
|
||||||
args: librbd.SiteMirrorImageStatus{
|
|
||||||
State: librbd.MirrorImageStatusStateStopped,
|
|
||||||
Up: true,
|
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "test for invalid state",
|
|
||||||
args: librbd.SiteMirrorImageStatus{
|
|
||||||
State: librbd.MirrorImageStatusState(100),
|
|
||||||
Up: true,
|
|
||||||
},
|
},
|
||||||
wantErr: true,
|
wantErr: true,
|
||||||
},
|
},
|
||||||
@ -644,3 +596,64 @@ func TestGetGRPCError(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_timestampFromString(t *testing.T) {
|
||||||
|
tm := timestamppb.Now()
|
||||||
|
t.Parallel()
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
timestamp string
|
||||||
|
want time.Time
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid timestamp",
|
||||||
|
timestamp: timestampToString(tm),
|
||||||
|
want: tm.AsTime().Local(),
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid timestamp",
|
||||||
|
timestamp: "invalid",
|
||||||
|
want: time.Time{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty timestamp",
|
||||||
|
timestamp: "",
|
||||||
|
want: time.Time{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid format",
|
||||||
|
timestamp: "seconds:%d nanos:%d",
|
||||||
|
want: time.Time{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing nanos",
|
||||||
|
timestamp: "seconds:10",
|
||||||
|
want: time.Time{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing seconds",
|
||||||
|
timestamp: "nanos:0",
|
||||||
|
want: time.Time{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
tt := tt
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
got, err := timestampFromString(tt.timestamp)
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("timestampFromString() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(got, tt.want) {
|
||||||
|
t.Errorf("timestampFromString() = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1558,6 +1558,19 @@ func (rv *rbdVolume) setImageOptions(ctx context.Context, options *librbd.ImageO
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetImageCreationTime returns the creation time of the image. if the image
|
||||||
|
// creation time is not set, it queries the image info and returns the creation time.
|
||||||
|
func (ri *rbdImage) GetImageCreationTime() (*timestamppb.Timestamp, error) {
|
||||||
|
if ri.CreatedAt != nil {
|
||||||
|
return ri.CreatedAt, nil
|
||||||
|
}
|
||||||
|
if err := ri.getImageInfo(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return ri.CreatedAt, nil
|
||||||
|
}
|
||||||
|
|
||||||
// getImageInfo queries rbd about the given image and returns its metadata, and returns
|
// getImageInfo queries rbd about the given image and returns its metadata, and returns
|
||||||
// ErrImageNotFound if provided image is not found.
|
// ErrImageNotFound if provided image is not found.
|
||||||
func (ri *rbdImage) getImageInfo() error {
|
func (ri *rbdImage) getImageInfo() error {
|
||||||
|
@ -19,21 +19,12 @@ package rbd
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
|
||||||
|
|
||||||
librbd "github.com/ceph/go-ceph/rbd"
|
librbd "github.com/ceph/go-ceph/rbd"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (rv *rbdVolume) ResyncVol(localStatus librbd.SiteMirrorImageStatus, force bool) error {
|
func (rv *rbdVolume) ResyncVol(localStatus librbd.SiteMirrorImageStatus) error {
|
||||||
if resyncRequired(localStatus) {
|
if err := rv.resyncImage(); err != nil {
|
||||||
// If the force option is not set return the error message to retry
|
|
||||||
// with Force option.
|
|
||||||
if !force {
|
|
||||||
return fmt.Errorf("%w: image is in %q state, description (%s). Force resync to recover volume",
|
|
||||||
ErrFailedPrecondition, localStatus.State, localStatus.Description)
|
|
||||||
}
|
|
||||||
err := rv.resyncImage()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("%w: failed to resync image: %w", ErrResyncImageFailed, err)
|
return fmt.Errorf("%w: failed to resync image: %w", ErrResyncImageFailed, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,9 +32,6 @@ func (rv *rbdVolume) ResyncVol(localStatus librbd.SiteMirrorImageStatus, force b
|
|||||||
// locally. Caller retries till RBD syncs an initial version of the image to
|
// locally. Caller retries till RBD syncs an initial version of the image to
|
||||||
// report its status in the resync request.
|
// report its status in the resync request.
|
||||||
return fmt.Errorf("%w: awaiting initial resync due to split brain", ErrUnavailable)
|
return fmt.Errorf("%w: awaiting initial resync due to split brain", ErrUnavailable)
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// repairResyncedImageID updates the existing image ID with new one.
|
// repairResyncedImageID updates the existing image ID with new one.
|
||||||
@ -66,22 +54,6 @@ func (rv *rbdVolume) RepairResyncedImageID(ctx context.Context, ready bool) erro
|
|||||||
return rv.repairImageID(ctx, j, true)
|
return rv.repairImageID(ctx, j, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// resyncRequired returns true if local image is in split-brain state and image
|
|
||||||
// needs resync.
|
|
||||||
func resyncRequired(localStatus librbd.SiteMirrorImageStatus) bool {
|
|
||||||
// resync is required if the image is in error state or the description
|
|
||||||
// contains split-brain message.
|
|
||||||
// In some corner cases like `re-player shutdown` the local image will not
|
|
||||||
// be in an error state. It would be also worth considering the `description`
|
|
||||||
// field to make sure about split-brain.
|
|
||||||
if localStatus.State == librbd.MirrorImageStatusStateError ||
|
|
||||||
strings.Contains(localStatus.Description, "split-brain") {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rv *rbdVolume) DisableVolumeReplication(
|
func (rv *rbdVolume) DisableVolumeReplication(
|
||||||
mirroringInfo *librbd.MirrorImageInfo,
|
mirroringInfo *librbd.MirrorImageInfo,
|
||||||
force bool,
|
force bool,
|
||||||
|
Loading…
Reference in New Issue
Block a user