ceph-csi/pkg/cephfs/nodeserver.go
ShyamsundarR c4a3675cec Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.

The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.

Tests to create PVCs before and after these changes look like so,

Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds

20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
  - Once was stalled for more than 8 minutes and cancelled the run

After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds

Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-07-01 14:10:14 +00:00

325 lines
9.9 KiB
Go

/*
Copyright 2018 The Ceph-CSI Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cephfs
import (
"context"
"fmt"
"os"
csicommon "github.com/ceph/ceph-csi/pkg/csi-common"
"github.com/ceph/ceph-csi/pkg/util"
"github.com/container-storage-interface/spec/lib/go/csi"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"k8s.io/klog"
)
// NodeServer struct of ceph CSI driver with supported methods of CSI
// node server spec.
type NodeServer struct {
*csicommon.DefaultNodeServer
}
var (
nodeVolumeIDLocker = util.NewIDLocker()
)
func getCredentialsForVolume(volOptions *volumeOptions, volID volumeID, req *csi.NodeStageVolumeRequest) (*util.Credentials, error) {
var (
cr *util.Credentials
secrets = req.GetSecrets()
)
if volOptions.ProvisionVolume {
// The volume is provisioned dynamically, get the credentials directly from Ceph
// First, get admin credentials - those are needed for retrieving the user credentials
adminCr, err := util.GetAdminCredentials(secrets)
if err != nil {
return nil, fmt.Errorf("failed to get admin credentials from node stage secrets: %v", err)
}
// Then get the ceph user
entity, err := getCephUser(volOptions, adminCr, volID)
if err != nil {
return nil, fmt.Errorf("failed to get ceph user: %v", err)
}
cr = entity.toCredentials()
} else {
// The volume is pre-made, credentials are in node stage secrets
userCr, err := util.GetUserCredentials(req.GetSecrets())
if err != nil {
return nil, fmt.Errorf("failed to get user credentials from node stage secrets: %v", err)
}
cr = userCr
}
return cr, nil
}
// NodeStageVolume mounts the volume to a staging path on the node.
func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) {
var (
volOptions *volumeOptions
vid *volumeIdentifier
)
if err := validateNodeStageVolumeRequest(req); err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
// Configuration
stagingTargetPath := req.GetStagingTargetPath()
volID := volumeID(req.GetVolumeId())
volOptions, vid, err := newVolumeOptionsFromVolID(string(volID), req.GetVolumeContext(), req.GetSecrets())
if err != nil {
if _, ok := err.(ErrInvalidVolID); !ok {
return nil, status.Error(codes.Internal, err.Error())
}
// check for pre-provisioned volumes (plugin versions > 1.0.0)
volOptions, vid, err = newVolumeOptionsFromStaticVolume(string(volID), req.GetVolumeContext())
if err != nil {
if _, ok := err.(ErrNonStaticVolume); !ok {
return nil, status.Error(codes.Internal, err.Error())
}
// check for volumes from plugin versions <= 1.0.0
volOptions, vid, err = newVolumeOptionsFromVersion1Context(string(volID), req.GetVolumeContext(),
req.GetSecrets())
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
}
}
if err = createMountPoint(stagingTargetPath); err != nil {
klog.Errorf("failed to create staging mount point at %s for volume %s: %v", stagingTargetPath, volID, err)
return nil, status.Error(codes.Internal, err.Error())
}
idLk := nodeVolumeIDLocker.Lock(string(volID))
defer nodeVolumeIDLocker.Unlock(idLk, string(volID))
// Check if the volume is already mounted
isMnt, err := isMountPoint(stagingTargetPath)
if err != nil {
klog.Errorf("stat failed: %v", err)
return nil, status.Error(codes.Internal, err.Error())
}
if isMnt {
klog.Infof("cephfs: volume %s is already mounted to %s, skipping", volID, stagingTargetPath)
return &csi.NodeStageVolumeResponse{}, nil
}
// It's not, mount now
if err = ns.mount(volOptions, req, vid); err != nil {
return nil, err
}
klog.Infof("cephfs: successfully mounted volume %s to %s", volID, stagingTargetPath)
return &csi.NodeStageVolumeResponse{}, nil
}
func (*NodeServer) mount(volOptions *volumeOptions, req *csi.NodeStageVolumeRequest, vid *volumeIdentifier) error {
stagingTargetPath := req.GetStagingTargetPath()
volID := volumeID(req.GetVolumeId())
cr, err := getCredentialsForVolume(volOptions, volumeID(vid.FsSubvolName), req)
if err != nil {
klog.Errorf("failed to get ceph credentials for volume %s: %v", volID, err)
return status.Error(codes.Internal, err.Error())
}
m, err := newMounter(volOptions)
if err != nil {
klog.Errorf("failed to create mounter for volume %s: %v", volID, err)
return status.Error(codes.Internal, err.Error())
}
klog.V(4).Infof("cephfs: mounting volume %s with %s", volID, m.name())
if err = m.mount(stagingTargetPath, cr, volOptions); err != nil {
klog.Errorf("failed to mount volume %s: %v", volID, err)
return status.Error(codes.Internal, err.Error())
}
if err := volumeMountCache.nodeStageVolume(req.GetVolumeId(), stagingTargetPath, volOptions.Mounter, req.GetSecrets()); err != nil {
klog.Warningf("mount-cache: failed to stage volume %s %s: %v", volID, stagingTargetPath, err)
}
return nil
}
// NodePublishVolume mounts the volume mounted to the staging path to the target
// path
func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
mountOptions := []string{"bind"}
if err := validateNodePublishVolumeRequest(req); err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
// Configuration
targetPath := req.GetTargetPath()
volID := req.GetVolumeId()
if err := createMountPoint(targetPath); err != nil {
klog.Errorf("failed to create mount point at %s: %v", targetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
volCap := req.GetVolumeCapability()
if req.GetReadonly() {
mountOptions = append(mountOptions, "ro")
}
if m := volCap.GetMount(); m != nil {
hasOption := func(options []string, opt string) bool {
for _, o := range options {
if o == opt {
return true
}
}
return false
}
for _, f := range m.MountFlags {
if !hasOption(mountOptions, f) {
mountOptions = append(mountOptions, f)
}
}
}
// Check if the volume is already mounted
isMnt, err := isMountPoint(targetPath)
if err != nil {
klog.Errorf("stat failed: %v", err)
return nil, status.Error(codes.Internal, err.Error())
}
if isMnt {
klog.Infof("cephfs: volume %s is already bind-mounted to %s", volID, targetPath)
return &csi.NodePublishVolumeResponse{}, nil
}
// It's not, mount now
if err = bindMount(req.GetStagingTargetPath(), req.GetTargetPath(), req.GetReadonly(), mountOptions); err != nil {
klog.Errorf("failed to bind-mount volume %s: %v", volID, err)
return nil, status.Error(codes.Internal, err.Error())
}
if err = volumeMountCache.nodePublishVolume(volID, targetPath, req.GetReadonly()); err != nil {
klog.Warningf("mount-cache: failed to publish volume %s %s: %v", volID, targetPath, err)
}
klog.Infof("cephfs: successfully bind-mounted volume %s to %s", volID, targetPath)
err = os.Chmod(targetPath, 0777)
if err != nil {
klog.Errorf("failed to change targetpath permission for volume %s: %v", volID, err)
return nil, status.Error(codes.Internal, err.Error())
}
return &csi.NodePublishVolumeResponse{}, nil
}
// NodeUnpublishVolume unmounts the volume from the target path
func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) {
var err error
if err = validateNodeUnpublishVolumeRequest(req); err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
targetPath := req.GetTargetPath()
volID := req.GetVolumeId()
if err = volumeMountCache.nodeUnPublishVolume(volID, targetPath); err != nil {
klog.Warningf("mount-cache: failed to unpublish volume %s %s: %v", volID, targetPath, err)
}
// Unmount the bind-mount
if err = unmountVolume(targetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
if err = os.Remove(targetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
klog.Infof("cephfs: successfully unbinded volume %s from %s", req.GetVolumeId(), targetPath)
return &csi.NodeUnpublishVolumeResponse{}, nil
}
// NodeUnstageVolume unstages the volume from the staging path
func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) {
var err error
if err = validateNodeUnstageVolumeRequest(req); err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
stagingTargetPath := req.GetStagingTargetPath()
volID := req.GetVolumeId()
if err = volumeMountCache.nodeUnStageVolume(volID); err != nil {
klog.Warningf("mount-cache: failed to unstage volume %s %s: %v", volID, stagingTargetPath, err)
}
// Unmount the volume
if err = unmountVolume(stagingTargetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
if err = os.Remove(stagingTargetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
klog.Infof("cephfs: successfully unmounted volume %s from %s", req.GetVolumeId(), stagingTargetPath)
return &csi.NodeUnstageVolumeResponse{}, nil
}
// NodeGetCapabilities returns the supported capabilities of the node server
func (ns *NodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) {
return &csi.NodeGetCapabilitiesResponse{
Capabilities: []*csi.NodeServiceCapability{
{
Type: &csi.NodeServiceCapability_Rpc{
Rpc: &csi.NodeServiceCapability_RPC{
Type: csi.NodeServiceCapability_RPC_STAGE_UNSTAGE_VOLUME,
},
},
},
},
}, nil
}