mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 10:53:34 +00:00
rbdplugin: idempotent DeleteVolume
When the initial DeleteVolume times out (as it does on slow clusters due to the low 10 second limit), the external-provisioner calls it again. The CSI standard requires the second call to succeed if the volume has been deleted in the meantime. This didn't work because DeleteVolume returned an error when failing to find the volume info file: rbdplugin: E1008 08:05:35.631783 1 utils.go:100] GRPC error: rbd: open err /var/lib/kubelet/plugins/csi-rbdplugin/controller/csi-rbd-622a252c-cad0-11e8-9112-deadbeef0101.json/open /var/lib/kubelet/plugins/csi-rbdplugin/controller/csi-rbd-622a252c-cad0-11e8-9112-deadbeef0101.json: no such file or directory The fix is to treat a missing volume info file as "volume already deleted" and return success. To detect this, the original os error must be wrapped, otherwise the caller of loadVolInfo cannot determine the root cause. Note that further work may be needed to make the driver really resilient, for example there are probably concurrency issues. But for now this fixes: #82
This commit is contained in:
@ -18,6 +18,7 @@ package rbd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"syscall"
|
||||
@ -27,6 +28,7 @@ import (
|
||||
"github.com/golang/glog"
|
||||
"github.com/kubernetes-csi/drivers/pkg/csi-common"
|
||||
"github.com/pborman/uuid"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
@ -156,17 +158,24 @@ func (cs *controllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
|
||||
volumeID := req.GetVolumeId()
|
||||
rbdVol := &rbdVolume{}
|
||||
if err := loadVolInfo(volumeID, path.Join(PluginFolder, "controller"), rbdVol); err != nil {
|
||||
if os.IsNotExist(errors.Cause(err)) {
|
||||
// Must have been deleted already. This is not an error (idempotency!).
|
||||
return &csi.DeleteVolumeResponse{}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
volName := rbdVol.VolName
|
||||
// Deleting rbd image
|
||||
glog.V(4).Infof("deleting volume %s", volName)
|
||||
if err := deleteRBDImage(rbdVol, rbdVol.AdminId, req.GetControllerDeleteSecrets()); err != nil {
|
||||
// TODO: can we detect "already deleted" situations here and proceed?
|
||||
glog.V(3).Infof("failed to delete rbd image: %s/%s with error: %v", rbdVol.Pool, volName, err)
|
||||
return nil, err
|
||||
}
|
||||
// Removing persistent storage file for the unmapped volume
|
||||
if err := deleteVolInfo(volumeID, path.Join(PluginFolder, "controller")); err != nil {
|
||||
// TODO: we can theoretically end up here when two DeleteVolume calls
|
||||
// get invoked concurrently. Serialize?
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user