mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-05-21 14:56:41 +00:00
rbd: include a delay and check for syncing
status after resyncing
It may take some time for the RBD-mirror daemon to start syncing the image. After the resync operation is executed, the status of the resync is checked with a small delay to prevent subsequent resync calls from re-starting the resync quickly after each other. Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
parent
b0994a5356
commit
86576b4e11
@ -18,6 +18,7 @@ package rbd
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
@ -209,8 +210,12 @@ func (rm *rbdMirror) Demote(_ context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Resync resync image to correct the split-brain.
|
||||
func (rm *rbdMirror) Resync(_ context.Context) error {
|
||||
// Resync resync image to correct the split-brain. It may take some time for
|
||||
// the RBD-mirror daemon to start syncing the image. After the resync operation
|
||||
// is executed, the status of the resync is checked with a small delay to
|
||||
// prevent subsequent resync calls from re-starting the resync quickly after
|
||||
// each other.
|
||||
func (rm *rbdMirror) Resync(ctx context.Context) error {
|
||||
image, err := rm.open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open image %q with error: %w", rm, err)
|
||||
@ -221,6 +226,43 @@ func (rm *rbdMirror) Resync(_ context.Context) error {
|
||||
return fmt.Errorf("failed to resync image %q with error: %w", rm, err)
|
||||
}
|
||||
|
||||
// delay until the state is syncing, or until 1+2+4+8+16 seconds passed
|
||||
delay := 1 * time.Second
|
||||
for {
|
||||
time.Sleep(delay)
|
||||
|
||||
sts, dErr := rm.GetGlobalMirroringStatus(ctx)
|
||||
if dErr != nil {
|
||||
// the image gets recreated after issuing resync
|
||||
if errors.Is(dErr, rbderrors.ErrImageNotFound) {
|
||||
continue
|
||||
}
|
||||
log.ErrorLog(ctx, dErr.Error())
|
||||
|
||||
return dErr
|
||||
}
|
||||
|
||||
localStatus, dErr := sts.GetLocalSiteStatus()
|
||||
if dErr != nil {
|
||||
log.ErrorLog(ctx, dErr.Error())
|
||||
|
||||
return fmt.Errorf("failed to get local status: %w", dErr)
|
||||
}
|
||||
|
||||
syncInfo, dErr := localStatus.GetLastSyncInfo(ctx)
|
||||
if dErr != nil {
|
||||
return fmt.Errorf("failed to get last sync info: %w", dErr)
|
||||
}
|
||||
if syncInfo.IsSyncing() {
|
||||
return nil
|
||||
}
|
||||
|
||||
delay = 2 * delay
|
||||
if delay > 30 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we issued a resync, return a non-final error as image needs to be recreated
|
||||
// locally. Caller retries till RBD syncs an initial version of the image to
|
||||
// report its status in the resync request.
|
||||
|
Loading…
x
Reference in New Issue
Block a user