rbd: include a delay and check for syncing status after resyncing

It may take some time for the RBD-mirror daemon to start syncing the
image. After the resync operation is executed, the status of the resync
is checked with a small delay to prevent subsequent resync calls from
re-starting the resync quickly after each other.

Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
Niels de Vos 2025-04-16 18:41:03 +02:00 committed by mergify[bot]
parent b0994a5356
commit 86576b4e11

View File

@ -18,6 +18,7 @@ package rbd
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
@ -209,8 +210,12 @@ func (rm *rbdMirror) Demote(_ context.Context) error {
return nil
}
// Resync resync image to correct the split-brain.
func (rm *rbdMirror) Resync(_ context.Context) error {
// Resync resync image to correct the split-brain. It may take some time for
// the RBD-mirror daemon to start syncing the image. After the resync operation
// is executed, the status of the resync is checked with a small delay to
// prevent subsequent resync calls from re-starting the resync quickly after
// each other.
func (rm *rbdMirror) Resync(ctx context.Context) error {
image, err := rm.open()
if err != nil {
return fmt.Errorf("failed to open image %q with error: %w", rm, err)
@ -221,6 +226,43 @@ func (rm *rbdMirror) Resync(_ context.Context) error {
return fmt.Errorf("failed to resync image %q with error: %w", rm, err)
}
// delay until the state is syncing, or until 1+2+4+8+16 seconds passed
delay := 1 * time.Second
for {
time.Sleep(delay)
sts, dErr := rm.GetGlobalMirroringStatus(ctx)
if dErr != nil {
// the image gets recreated after issuing resync
if errors.Is(dErr, rbderrors.ErrImageNotFound) {
continue
}
log.ErrorLog(ctx, dErr.Error())
return dErr
}
localStatus, dErr := sts.GetLocalSiteStatus()
if dErr != nil {
log.ErrorLog(ctx, dErr.Error())
return fmt.Errorf("failed to get local status: %w", dErr)
}
syncInfo, dErr := localStatus.GetLastSyncInfo(ctx)
if dErr != nil {
return fmt.Errorf("failed to get last sync info: %w", dErr)
}
if syncInfo.IsSyncing() {
return nil
}
delay = 2 * delay
if delay > 30 {
break
}
}
// If we issued a resync, return a non-final error as image needs to be recreated
// locally. Caller retries till RBD syncs an initial version of the image to
// report its status in the resync request.