rbdplugin: idempotent DeleteVolume

When the initial DeleteVolume times out (as it does on slow clusters
due to the low 10 second limit), the external-provisioner calls it
again. The CSI standard requires the second call to succeed if the
volume has been deleted in the meantime. This didn't work because
DeleteVolume returned an error when failing to find the volume info
file:

  rbdplugin: E1008 08:05:35.631783       1 utils.go:100] GRPC error: rbd: open err /var/lib/kubelet/plugins/csi-rbdplugin/controller/csi-rbd-622a252c-cad0-11e8-9112-deadbeef0101.json/open /var/lib/kubelet/plugins/csi-rbdplugin/controller/csi-rbd-622a252c-cad0-11e8-9112-deadbeef0101.json: no such file or directory

The fix is to treat a missing volume info file as "volume already
deleted" and return success. To detect this, the original os error
must be wrapped, otherwise the caller of loadVolInfo cannot determine
the root cause.

Note that further work may be needed to make the driver really
resilient, for example there are probably concurrency issues.
But for now this fixes: #82
This commit is contained in:
Patrick Ohly
2018-10-09 12:08:56 +02:00
parent 239f295dd1
commit 25e3a961c3
15 changed files with 1941 additions and 16 deletions

View File

@ -18,6 +18,7 @@ package rbd
import (
"fmt"
"os"
"os/exec"
"path"
"syscall"
@ -27,6 +28,7 @@ import (
"github.com/golang/glog"
"github.com/kubernetes-csi/drivers/pkg/csi-common"
"github.com/pborman/uuid"
"github.com/pkg/errors"
"golang.org/x/net/context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@ -156,17 +158,24 @@ func (cs *controllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
volumeID := req.GetVolumeId()
rbdVol := &rbdVolume{}
if err := loadVolInfo(volumeID, path.Join(PluginFolder, "controller"), rbdVol); err != nil {
if os.IsNotExist(errors.Cause(err)) {
// Must have been deleted already. This is not an error (idempotency!).
return &csi.DeleteVolumeResponse{}, nil
}
return nil, err
}
volName := rbdVol.VolName
// Deleting rbd image
glog.V(4).Infof("deleting volume %s", volName)
if err := deleteRBDImage(rbdVol, rbdVol.AdminId, req.GetControllerDeleteSecrets()); err != nil {
// TODO: can we detect "already deleted" situations here and proceed?
glog.V(3).Infof("failed to delete rbd image: %s/%s with error: %v", rbdVol.Pool, volName, err)
return nil, err
}
// Removing persistent storage file for the unmapped volume
if err := deleteVolInfo(volumeID, path.Join(PluginFolder, "controller")); err != nil {
// TODO: we can theoretically end up here when two DeleteVolume calls
// get invoked concurrently. Serialize?
return nil, err
}