2018-01-09 18:59:50 +00:00
/ *
2019-04-03 08:46:15 +00:00
Copyright 2018 The Ceph - CSI Authors .
2018-01-09 18:59:50 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package rbd
import (
2018-01-16 01:52:28 +00:00
"fmt"
2018-01-09 18:59:50 +00:00
"os"
2018-11-07 02:05:19 +00:00
"os/exec"
"regexp"
2018-01-16 01:52:28 +00:00
"strings"
2018-01-09 18:59:50 +00:00
2019-05-13 04:47:17 +00:00
csicommon "github.com/ceph/ceph-csi/pkg/csi-common"
2019-04-22 21:35:39 +00:00
"github.com/ceph/ceph-csi/pkg/util"
2018-01-09 18:59:50 +00:00
2018-11-24 19:18:24 +00:00
"github.com/container-storage-interface/spec/lib/go/csi"
2019-02-18 11:30:28 +00:00
"golang.org/x/net/context"
2018-01-09 18:59:50 +00:00
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
2019-02-18 11:30:28 +00:00
"k8s.io/klog"
2018-01-09 18:59:50 +00:00
"k8s.io/kubernetes/pkg/util/mount"
)
2019-01-28 11:47:06 +00:00
// NodeServer struct of ceph rbd driver with supported methods of CSI
// node server spec
2019-01-17 07:51:06 +00:00
type NodeServer struct {
2018-01-09 18:59:50 +00:00
* csicommon . DefaultNodeServer
2018-10-15 14:59:41 +00:00
mounter mount . Interface
2018-01-09 18:59:50 +00:00
}
2019-01-28 11:47:06 +00:00
// NodePublishVolume mounts the volume mounted to the device path to the target
// path
2019-01-17 07:51:06 +00:00
func ( ns * NodeServer ) NodePublishVolume ( ctx context . Context , req * csi . NodePublishVolumeRequest ) ( * csi . NodePublishVolumeResponse , error ) {
2018-01-09 18:59:50 +00:00
targetPath := req . GetTargetPath ( )
2019-04-22 21:35:39 +00:00
if targetPath == "" {
2019-05-13 04:47:17 +00:00
return nil , status . Error ( codes . InvalidArgument , "empty target path in request" )
2019-04-22 21:35:39 +00:00
}
2019-01-28 13:59:16 +00:00
2019-04-22 21:35:39 +00:00
if req . GetVolumeCapability ( ) == nil {
2019-05-13 04:47:17 +00:00
return nil , status . Error ( codes . InvalidArgument , "empty volume capability in request" )
2019-04-22 21:35:39 +00:00
}
if req . GetVolumeId ( ) == "" {
2019-05-13 04:47:17 +00:00
return nil , status . Error ( codes . InvalidArgument , "empty volume ID in request" )
2019-04-22 21:35:39 +00:00
}
2019-06-01 21:26:42 +00:00
cr , err := util . GetUserCredentials ( req . GetSecrets ( ) )
if err != nil {
return nil , status . Error ( codes . Internal , err . Error ( ) )
}
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
idLk := targetPathLocker . Lock ( targetPath )
defer targetPathLocker . Unlock ( idLk , targetPath )
2018-10-17 12:52:45 +00:00
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
disableInUseChecks := false
2018-11-01 01:03:03 +00:00
2019-01-28 19:55:10 +00:00
isBlock := req . GetVolumeCapability ( ) . GetBlock ( ) != nil
2018-11-15 02:06:42 +00:00
// Check if that target path exists properly
2019-01-28 19:55:10 +00:00
notMnt , err := ns . createTargetPath ( targetPath , isBlock )
2018-11-15 02:06:42 +00:00
if err != nil {
2019-01-28 19:55:10 +00:00
return nil , err
2018-11-15 02:06:42 +00:00
}
2018-01-09 18:59:50 +00:00
2018-11-15 02:06:42 +00:00
if ! notMnt {
return & csi . NodePublishVolumeResponse { } , nil
2018-01-09 18:59:50 +00:00
}
2019-03-14 00:18:04 +00:00
// MULTI_NODE_MULTI_WRITER is supported by default for Block access type volumes
if req . VolumeCapability . AccessMode . Mode == csi . VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER {
if isBlock {
disableInUseChecks = true
} else {
klog . Warningf ( "MULTI_NODE_MULTI_WRITER currently only supported with volumes of access type `block`, invalid AccessMode for volume: %v" , req . GetVolumeId ( ) )
2019-03-18 16:08:24 +00:00
return nil , status . Error ( codes . InvalidArgument , "rbd: RWX access mode request is only valid for volumes with access type `block`" )
2019-03-14 00:18:04 +00:00
}
}
2019-04-22 21:35:39 +00:00
volOptions , err := genVolFromVolumeOptions ( req . GetVolumeContext ( ) , disableInUseChecks )
2018-01-09 18:59:50 +00:00
if err != nil {
return nil , err
}
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
volName , err := ns . getVolumeName ( req )
if err != nil {
return nil , err
}
2019-04-22 21:35:39 +00:00
volOptions . RbdImageName = volName
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
2018-01-09 18:59:50 +00:00
// Mapping RBD image
2019-06-01 21:26:42 +00:00
devicePath , err := attachRBDImage ( volOptions , cr )
2018-01-09 18:59:50 +00:00
if err != nil {
return nil , err
}
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "rbd image: %s/%s was successfully mapped at %s\n" , req . GetVolumeId ( ) , volOptions . Pool , devicePath )
2018-01-09 18:59:50 +00:00
2019-01-28 19:55:10 +00:00
// Publish Path
err = ns . mountVolume ( req , devicePath )
if err != nil {
return nil , err
}
2019-06-11 12:40:31 +00:00
err = os . Chmod ( targetPath , 0777 )
if err != nil {
return nil , status . Error ( codes . Internal , err . Error ( ) )
}
2019-01-28 19:55:10 +00:00
return & csi . NodePublishVolumeResponse { } , nil
}
func ( ns * NodeServer ) getVolumeName ( req * csi . NodePublishVolumeRequest ) ( string , error ) {
2019-04-22 21:35:39 +00:00
var vi util . CSIIdentifier
err := vi . DecomposeCSIID ( req . GetVolumeId ( ) )
if err != nil {
klog . Errorf ( "error decoding volume ID (%s) (%s)" , err , req . GetVolumeId ( ) )
return "" , status . Error ( codes . InvalidArgument , err . Error ( ) )
2019-01-28 19:55:10 +00:00
}
2019-04-22 21:35:39 +00:00
2019-05-14 19:15:01 +00:00
return volJournal . NamingPrefix ( ) + vi . ObjectUUID , nil
2019-01-28 19:55:10 +00:00
}
func ( ns * NodeServer ) mountVolume ( req * csi . NodePublishVolumeRequest , devicePath string ) error {
2018-11-01 01:03:03 +00:00
// Publish Path
fsType := req . GetVolumeCapability ( ) . GetMount ( ) . GetFsType ( )
2018-01-09 18:59:50 +00:00
readOnly := req . GetReadonly ( )
2018-11-24 19:18:24 +00:00
attrib := req . GetVolumeContext ( )
2018-01-09 18:59:50 +00:00
mountFlags := req . GetVolumeCapability ( ) . GetMount ( ) . GetMountFlags ( )
2019-01-28 19:55:10 +00:00
isBlock := req . GetVolumeCapability ( ) . GetBlock ( ) != nil
targetPath := req . GetTargetPath ( )
2018-01-09 18:59:50 +00:00
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "target %v\nisBlock %v\nfstype %v\ndevice %v\nreadonly %v\nattributes %v\n mountflags %v\n" ,
2018-11-01 01:03:03 +00:00
targetPath , isBlock , fsType , devicePath , readOnly , attrib , mountFlags )
2018-01-09 18:59:50 +00:00
2018-10-15 14:59:41 +00:00
diskMounter := & mount . SafeFormatAndMount { Interface : ns . mounter , Exec : mount . NewOsExec ( ) }
2018-11-01 01:03:03 +00:00
if isBlock {
options := [ ] string { "bind" }
if err := diskMounter . Mount ( devicePath , targetPath , fsType , options ) ; err != nil {
2019-01-28 19:55:10 +00:00
return err
2018-11-01 01:03:03 +00:00
}
} else {
options := [ ] string { }
if readOnly {
options = append ( options , "ro" )
}
if err := diskMounter . FormatAndMount ( devicePath , targetPath , fsType , options ) ; err != nil {
2019-01-28 19:55:10 +00:00
return err
2018-11-01 01:03:03 +00:00
}
2018-01-09 18:59:50 +00:00
}
2019-01-28 19:55:10 +00:00
return nil
}
func ( ns * NodeServer ) createTargetPath ( targetPath string , isBlock bool ) ( bool , error ) {
// Check if that target path exists properly
2019-06-24 07:58:39 +00:00
notMnt , err := mount . IsNotMountPoint ( ns . mounter , targetPath )
2019-01-28 19:55:10 +00:00
if err != nil {
if os . IsNotExist ( err ) {
if isBlock {
// create an empty file
// #nosec
targetPathFile , e := os . OpenFile ( targetPath , os . O_CREATE | os . O_RDWR , 0750 )
if e != nil {
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "Failed to create targetPath:%s with error: %v" , targetPath , err )
2019-01-28 19:55:10 +00:00
return notMnt , status . Error ( codes . Internal , e . Error ( ) )
}
if err = targetPathFile . Close ( ) ; err != nil {
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "Failed to close targetPath:%s with error: %v" , targetPath , err )
2019-01-28 19:55:10 +00:00
return notMnt , status . Error ( codes . Internal , err . Error ( ) )
}
} else {
// Create a directory
if err = os . MkdirAll ( targetPath , 0750 ) ; err != nil {
return notMnt , status . Error ( codes . Internal , err . Error ( ) )
}
}
notMnt = true
} else {
return false , status . Error ( codes . Internal , err . Error ( ) )
}
}
return notMnt , err
2018-01-09 18:59:50 +00:00
}
2019-01-28 11:47:06 +00:00
// NodeUnpublishVolume unmounts the volume from the target path
2019-01-17 07:51:06 +00:00
func ( ns * NodeServer ) NodeUnpublishVolume ( ctx context . Context , req * csi . NodeUnpublishVolumeRequest ) ( * csi . NodeUnpublishVolumeResponse , error ) {
2018-01-09 18:59:50 +00:00
targetPath := req . GetTargetPath ( )
2019-04-22 21:35:39 +00:00
if targetPath == "" {
2019-05-13 04:47:17 +00:00
return nil , status . Error ( codes . InvalidArgument , "empty target path in request" )
2019-04-22 21:35:39 +00:00
}
if req . GetVolumeId ( ) == "" {
2019-05-13 04:47:17 +00:00
return nil , status . Error ( codes . InvalidArgument , "empty volume ID in request" )
2019-04-22 21:35:39 +00:00
}
Move locks to more granular locking than CPU count based
As detailed in issue #279, current lock scheme has hash
buckets that are count of CPUs. This causes a lot of contention
when parallel requests are made to the CSI plugin. To reduce
lock contention, this commit introduces granular locks per
identifier.
The commit also changes the timeout for gRPC requests to Create
and Delete volumes, as the current timeout is 10s (kubernetes
documentation says 15s but code defaults are 10s). A virtual
setup takes about 12-15s to complete a request at times, that leads
to unwanted retries of the same request, hence the increased
timeout to enable operation completion with minimal retries.
Tests to create PVCs before and after these changes look like so,
Before:
Default master code + sidecar provisioner --timeout option set
to 30 seconds
20 PVCs
Creation: 3 runs, 396/391/400 seconds
Deletion: 3 runs, 218/271/118 seconds
- Once was stalled for more than 8 minutes and cancelled the run
After:
Current commit + sidecar provisioner --timeout option set to 30 sec
20 PVCs
Creation: 3 runs, 42/59/65 seconds
Deletion: 3 runs, 32/32/31 seconds
Fixes: #279
Signed-off-by: ShyamsundarR <srangana@redhat.com>
2019-06-22 16:43:28 +00:00
idLk := targetPathLocker . Lock ( targetPath )
defer targetPathLocker . Unlock ( idLk , targetPath )
2018-01-09 18:59:50 +00:00
2019-06-24 07:58:39 +00:00
notMnt , err := mount . IsNotMountPoint ( ns . mounter , targetPath )
2018-11-15 02:06:42 +00:00
if err != nil {
if os . IsNotExist ( err ) {
// targetPath has already been deleted
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "targetPath: %s has already been deleted" , targetPath )
2018-11-15 02:06:42 +00:00
return & csi . NodeUnpublishVolumeResponse { } , nil
}
return nil , status . Error ( codes . NotFound , err . Error ( ) )
}
2018-11-15 20:40:19 +00:00
if notMnt {
// TODO should consider deleting path instead of returning error,
// once all codes become ready for csi 1.0.
2019-05-13 04:47:17 +00:00
return nil , status . Error ( codes . NotFound , "volume not mounted" )
2018-11-15 20:40:19 +00:00
}
2018-11-15 02:06:42 +00:00
2018-10-15 14:59:41 +00:00
devicePath , cnt , err := mount . GetDeviceNameFromMount ( ns . mounter , targetPath )
2018-02-20 16:10:59 +00:00
if err != nil {
return nil , status . Error ( codes . Internal , err . Error ( ) )
}
2019-01-29 05:49:16 +00:00
if err = ns . unmount ( targetPath , devicePath , cnt ) ; err != nil {
return nil , err
}
return & csi . NodeUnpublishVolumeResponse { } , nil
}
func ( ns * NodeServer ) unmount ( targetPath , devicePath string , cnt int ) error {
var err error
2018-11-15 20:40:19 +00:00
// Bind mounted device needs to be resolved by using resolveBindMountedBlockDevice
if devicePath == "devtmpfs" {
devicePath , err = resolveBindMountedBlockDevice ( targetPath )
if err != nil {
2019-01-29 05:49:16 +00:00
return status . Error ( codes . Internal , err . Error ( ) )
2018-11-15 20:40:19 +00:00
}
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "NodeUnpublishVolume: devicePath: %s, (original)cnt: %d\n" , devicePath , cnt )
2018-11-15 20:40:19 +00:00
// cnt for GetDeviceNameFromMount is broken for bind mouted device,
// it counts total number of mounted "devtmpfs", instead of counting this device.
// So, forcibly setting cnt to 1 here.
// TODO : fix this properly
cnt = 1
}
2019-02-04 13:05:07 +00:00
klog . V ( 4 ) . Infof ( "NodeUnpublishVolume: targetPath: %s, devicePath: %s\n" , targetPath , devicePath )
2018-11-15 20:40:19 +00:00
2018-01-09 18:59:50 +00:00
// Unmounting the image
2018-10-15 14:59:41 +00:00
err = ns . mounter . Unmount ( targetPath )
2018-01-09 18:59:50 +00:00
if err != nil {
2019-02-04 13:05:07 +00:00
klog . V ( 3 ) . Infof ( "failed to unmount targetPath: %s with error: %v" , targetPath , err )
2019-01-29 05:49:16 +00:00
return status . Error ( codes . Internal , err . Error ( ) )
2018-01-09 18:59:50 +00:00
}
2018-02-20 16:10:59 +00:00
cnt --
if cnt != 0 {
2018-11-15 20:40:19 +00:00
// TODO should this be fixed not to success, so that driver can retry unmounting?
2019-01-29 05:49:16 +00:00
return nil
2018-01-09 18:59:50 +00:00
}
2018-02-20 16:10:59 +00:00
// Unmapping rbd device
2019-01-28 13:59:16 +00:00
if err = detachRBDDevice ( devicePath ) ; err != nil {
2019-02-04 13:05:07 +00:00
klog . V ( 3 ) . Infof ( "failed to unmap rbd device: %s with error: %v" , devicePath , err )
2019-01-29 05:49:16 +00:00
return err
2018-01-09 18:59:50 +00:00
}
2018-11-15 02:06:42 +00:00
// Remove targetPath
2019-01-28 13:59:16 +00:00
if err = os . RemoveAll ( targetPath ) ; err != nil {
2019-02-04 13:05:07 +00:00
klog . V ( 3 ) . Infof ( "failed to remove targetPath: %s with error: %v" , targetPath , err )
2018-11-15 02:06:42 +00:00
}
2019-01-29 05:49:16 +00:00
return err
2018-01-09 18:59:50 +00:00
}
2018-11-07 02:05:19 +00:00
func resolveBindMountedBlockDevice ( mountPath string ) ( string , error ) {
2019-01-28 19:55:10 +00:00
// #nosec
2018-11-07 02:05:19 +00:00
cmd := exec . Command ( "findmnt" , "-n" , "-o" , "SOURCE" , "--first-only" , "--target" , mountPath )
out , err := cmd . CombinedOutput ( )
if err != nil {
2019-02-04 13:05:07 +00:00
klog . V ( 2 ) . Infof ( "Failed findmnt command for path %s: %s %v" , mountPath , out , err )
2018-11-07 02:05:19 +00:00
return "" , err
}
return parseFindMntResolveSource ( string ( out ) )
}
// parse output of "findmnt -o SOURCE --first-only --target" and return just the SOURCE
func parseFindMntResolveSource ( out string ) ( string , error ) {
// cut trailing newline
out = strings . TrimSuffix ( out , "\n" )
// Check if out is a mounted device
2019-01-29 05:49:16 +00:00
reMnt := regexp . MustCompile ( "^(/[^/]+(?:/[^/]*)*)$" )
2018-11-07 02:05:19 +00:00
if match := reMnt . FindStringSubmatch ( out ) ; match != nil {
return match [ 1 ] , nil
}
// Check if out is a block device
2019-01-29 05:49:16 +00:00
// nolint
reBlk := regexp . MustCompile ( "^devtmpfs\\[(/[^/]+(?:/[^/]*)*)\\]$" )
2018-11-07 02:05:19 +00:00
if match := reBlk . FindStringSubmatch ( out ) ; match != nil {
return fmt . Sprintf ( "/dev%s" , match [ 1 ] ) , nil
}
return "" , fmt . Errorf ( "parseFindMntResolveSource: %s doesn't match to any expected findMnt output" , out )
}