mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-12-18 02:50:30 +00:00
util: add support for the nsenter
add support to run rbd map and mount -t commands with the nsenter. complete design of pod/multus network is added here https://github.com/rook/rook/ blob/master/design/ceph/multus-network.md#csi-pods Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
parent
6333c4b1e6
commit
7b2aef0d81
@ -31,6 +31,7 @@ spec:
|
|||||||
priorityClassName: {{ .Values.nodeplugin.priorityClassName }}
|
priorityClassName: {{ .Values.nodeplugin.priorityClassName }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
# to use e.g. Rook orchestrated cluster, and mons' FQDN is
|
# to use e.g. Rook orchestrated cluster, and mons' FQDN is
|
||||||
# resolved through k8s service, set dns policy to cluster first
|
# resolved through k8s service, set dns policy to cluster first
|
||||||
dnsPolicy: ClusterFirstWithHostNet
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
|
@ -27,6 +27,7 @@ serviceAccounts:
|
|||||||
# - "<MONValue2>"
|
# - "<MONValue2>"
|
||||||
# cephFS:
|
# cephFS:
|
||||||
# subvolumeGroup: "csi"
|
# subvolumeGroup: "csi"
|
||||||
|
# netNamespaceFilePath: "{{ .kubeletDir }}/plugins/{{ .driverName }}/net"
|
||||||
csiConfig: []
|
csiConfig: []
|
||||||
|
|
||||||
# Set logging level for csi containers.
|
# Set logging level for csi containers.
|
||||||
|
@ -25,6 +25,7 @@ serviceAccounts:
|
|||||||
# monitors:
|
# monitors:
|
||||||
# - "<MONValue1>"
|
# - "<MONValue1>"
|
||||||
# - "<MONValue2>"
|
# - "<MONValue2>"
|
||||||
|
# netNamespaceFilePath: "{{ .kubeletDir }}/plugins/{{ .driverName }}/net"
|
||||||
csiConfig: []
|
csiConfig: []
|
||||||
|
|
||||||
# Configuration details of clusterID,PoolID and FscID mapping
|
# Configuration details of clusterID,PoolID and FscID mapping
|
||||||
|
@ -15,6 +15,7 @@ spec:
|
|||||||
serviceAccountName: cephfs-csi-nodeplugin
|
serviceAccountName: cephfs-csi-nodeplugin
|
||||||
priorityClassName: system-node-critical
|
priorityClassName: system-node-critical
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
# to use e.g. Rook orchestrated cluster, and mons' FQDN is
|
# to use e.g. Rook orchestrated cluster, and mons' FQDN is
|
||||||
# resolved through k8s service, set dns policy to cluster first
|
# resolved through k8s service, set dns policy to cluster first
|
||||||
dnsPolicy: ClusterFirstWithHostNet
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
|
@ -50,6 +50,42 @@ kubectl replace -f ./csi-config-map-sample.yaml
|
|||||||
Storage class and snapshot class, using `<cluster-id>` as the value for the
|
Storage class and snapshot class, using `<cluster-id>` as the value for the
|
||||||
option `clusterID`, can now be created on the cluster.
|
option `clusterID`, can now be created on the cluster.
|
||||||
|
|
||||||
|
## Running CephCSI with pod networking
|
||||||
|
|
||||||
|
The current problem with Pod Networking, is when a CephFS/RBD volume is mounted
|
||||||
|
in a pod using Ceph CSI and then the CSI CephFS/RBD plugin is restarted or
|
||||||
|
terminated (e.g. by restarting or deleting its DaemonSet), all operations on
|
||||||
|
the volume become blocked, even after restarting the CSI pods.
|
||||||
|
|
||||||
|
The only workaround is to restart the node where the Ceph CSI plugin pod was
|
||||||
|
restarted. This can be mitigated by running the `rbd map`/`mount -t` commands
|
||||||
|
in a different network namespace which does not get deleted when the CSI
|
||||||
|
CephFS/RBD plugin is restarted or terminated.
|
||||||
|
|
||||||
|
If someone wants to run the CephCSI with the pod networking they can still do
|
||||||
|
by setting the `netNamespaceFilePath`. If this path is set CephCSI will execute
|
||||||
|
the `rbd map`/`mount -t` commands after entering the [network
|
||||||
|
namespace](https://man7.org/linux/man-pages/man7/network_namespaces.7.html)
|
||||||
|
specified by `netNamespaceFilePath` with the
|
||||||
|
[nsenter](https://man7.org/linux/man-pages/man1/nsenter.1.html) command.
|
||||||
|
|
||||||
|
`netNamespaceFilePath` should point to the network namespace of some
|
||||||
|
long-running process, typically it would be a symlink to
|
||||||
|
`/proc/<long running process id>/ns/net`.
|
||||||
|
|
||||||
|
The long-running process can also be another pod which is a Daemonset which
|
||||||
|
never restarts. This Pod should only be stopped and restarted when a node is
|
||||||
|
stopped so that volume operations do not become blocked. The new DaemonSet pod
|
||||||
|
can contain a single container, responsible for holding its pod network alive.
|
||||||
|
It is used as a passthrough by the CephCSI plugin pod which when mounting or
|
||||||
|
mapping will use the network namespace of this pod.
|
||||||
|
|
||||||
|
Once the pod is created get its PID and create a symlink to
|
||||||
|
`/proc/<PID>/ns/net` in the hostPath volume shared with the csi-plugin pod and
|
||||||
|
specify the path in the `netNamespaceFilePath` option.
|
||||||
|
|
||||||
|
*Note* This Pod should have `hostPID: true` in the Pod Spec.
|
||||||
|
|
||||||
## Deploying the storage class
|
## Deploying the storage class
|
||||||
|
|
||||||
Once the plugin is successfully deployed, you'll need to customize
|
Once the plugin is successfully deployed, you'll need to customize
|
||||||
|
@ -20,6 +20,12 @@ kind: ConfigMap
|
|||||||
# NOTE: Make sure you don't add radosNamespace option to a currently in use
|
# NOTE: Make sure you don't add radosNamespace option to a currently in use
|
||||||
# configuration as it will cause issues.
|
# configuration as it will cause issues.
|
||||||
# The field "cephFS.subvolumeGroup" is optional and defaults to "csi".
|
# The field "cephFS.subvolumeGroup" is optional and defaults to "csi".
|
||||||
|
# The <netNamespaceFilePath#> fields are the various network namespace
|
||||||
|
# path for the Ceph cluster identified by the <cluster-id>, This will be used
|
||||||
|
# by the CSI plugin to execute the rbd map/unmap and mount -t commands in the
|
||||||
|
# network namespace specified by the <netNamespaceFilePath#>.
|
||||||
|
# If a CSI plugin is using more than one Ceph cluster, repeat the section for
|
||||||
|
# each such cluster in use.
|
||||||
# NOTE: Changes to the configmap is automatically updated in the running pods,
|
# NOTE: Changes to the configmap is automatically updated in the running pods,
|
||||||
# thus restarting existing pods using the configmap is NOT required on edits
|
# thus restarting existing pods using the configmap is NOT required on edits
|
||||||
# to the configmap.
|
# to the configmap.
|
||||||
@ -37,6 +43,7 @@ data:
|
|||||||
{
|
{
|
||||||
"clusterID": "<cluster-id>",
|
"clusterID": "<cluster-id>",
|
||||||
"radosNamespace": "<rados-namespace>",
|
"radosNamespace": "<rados-namespace>",
|
||||||
|
"netNamespaceFilePath": "<kubeletRootPath>/plugins/rbd.csi.ceph.com/net",
|
||||||
"monitors": [
|
"monitors": [
|
||||||
"<MONValue1>",
|
"<MONValue1>",
|
||||||
"<MONValue2>",
|
"<MONValue2>",
|
||||||
|
@ -65,12 +65,20 @@ func mountFuse(ctx context.Context, mountPoint string, cr *util.Credentials, vol
|
|||||||
if volOptions.FsName != "" {
|
if volOptions.FsName != "" {
|
||||||
args = append(args, "--client_mds_namespace="+volOptions.FsName)
|
args = append(args, "--client_mds_namespace="+volOptions.FsName)
|
||||||
}
|
}
|
||||||
|
var (
|
||||||
|
stderr string
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
if volOptions.NetNamespaceFilePath != "" {
|
||||||
|
_, stderr, err = util.ExecuteCommandWithNSEnter(ctx, volOptions.NetNamespaceFilePath, "ceph-fuse", args[:]...)
|
||||||
|
} else {
|
||||||
|
_, stderr, err = util.ExecCommand(ctx, "ceph-fuse", args[:]...)
|
||||||
|
}
|
||||||
|
|
||||||
_, stderr, err := util.ExecCommand(ctx, "ceph-fuse", args[:]...)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("%w stderr: %s", err, stderr)
|
return fmt.Errorf("%w stderr: %s", err, stderr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the output:
|
// Parse the output:
|
||||||
// We need "starting fuse" meaning the mount is ok
|
// We need "starting fuse" meaning the mount is ok
|
||||||
// and PID of the ceph-fuse daemon for unmount
|
// and PID of the ceph-fuse daemon for unmount
|
||||||
|
@ -51,7 +51,16 @@ func mountKernel(ctx context.Context, mountPoint string, cr *util.Credentials, v
|
|||||||
|
|
||||||
args = append(args, "-o", optionsStr)
|
args = append(args, "-o", optionsStr)
|
||||||
|
|
||||||
_, stderr, err := util.ExecCommand(ctx, "mount", args[:]...)
|
var (
|
||||||
|
stderr string
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
if volOptions.NetNamespaceFilePath != "" {
|
||||||
|
_, stderr, err = util.ExecuteCommandWithNSEnter(ctx, volOptions.NetNamespaceFilePath, "mount", args[:]...)
|
||||||
|
} else {
|
||||||
|
_, stderr, err = util.ExecCommand(ctx, "mount", args[:]...)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("%w stderr: %s", err, stderr)
|
return fmt.Errorf("%w stderr: %s", err, stderr)
|
||||||
}
|
}
|
||||||
|
@ -126,6 +126,11 @@ func (ns *NodeServer) NodeStageVolume(
|
|||||||
}
|
}
|
||||||
defer volOptions.Destroy()
|
defer volOptions.Destroy()
|
||||||
|
|
||||||
|
volOptions.NetNamespaceFilePath, err = util.GetNetNamespaceFilePath(util.CsiConfigFile, volOptions.ClusterID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
mnt, err := mounter.New(volOptions)
|
mnt, err := mounter.New(volOptions)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.ErrorLog(ctx, "failed to create mounter for volume %s: %v", volID, err)
|
log.ErrorLog(ctx, "failed to create mounter for volume %s: %v", volID, err)
|
||||||
|
@ -50,6 +50,8 @@ type VolumeOptions struct {
|
|||||||
ProvisionVolume bool `json:"provisionVolume"`
|
ProvisionVolume bool `json:"provisionVolume"`
|
||||||
KernelMountOptions string `json:"kernelMountOptions"`
|
KernelMountOptions string `json:"kernelMountOptions"`
|
||||||
FuseMountOptions string `json:"fuseMountOptions"`
|
FuseMountOptions string `json:"fuseMountOptions"`
|
||||||
|
// Network namespace file path to execute nsenter command
|
||||||
|
NetNamespaceFilePath string
|
||||||
|
|
||||||
// conn is a connection to the Ceph cluster obtained from a ConnPool
|
// conn is a connection to the Ceph cluster obtained from a ConnPool
|
||||||
conn *util.ClusterConnection
|
conn *util.ClusterConnection
|
||||||
|
@ -332,6 +332,10 @@ func (ns *NodeServer) NodeStageVolume(
|
|||||||
}
|
}
|
||||||
defer rv.Destroy()
|
defer rv.Destroy()
|
||||||
|
|
||||||
|
rv.NetNamespaceFilePath, err = util.GetNetNamespaceFilePath(util.CsiConfigFile, rv.ClusterID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
|
}
|
||||||
if isHealer {
|
if isHealer {
|
||||||
err = healerStageTransaction(ctx, cr, rv, stagingParentPath)
|
err = healerStageTransaction(ctx, cr, rv, stagingParentPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -457,8 +457,17 @@ func createPath(ctx context.Context, volOpt *rbdVolume, device string, cr *util.
|
|||||||
mapArgs = append(mapArgs, "--read-only")
|
mapArgs = append(mapArgs, "--read-only")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute map
|
var (
|
||||||
stdout, stderr, err := util.ExecCommand(ctx, cli, mapArgs...)
|
stdout string
|
||||||
|
stderr string
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
if volOpt.NetNamespaceFilePath != "" {
|
||||||
|
stdout, stderr, err = util.ExecuteCommandWithNSEnter(ctx, volOpt.NetNamespaceFilePath, cli, mapArgs...)
|
||||||
|
} else {
|
||||||
|
stdout, stderr, err = util.ExecCommand(ctx, cli, mapArgs...)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WarningLog(ctx, "rbd: map error %v, rbd output: %s", err, stderr)
|
log.WarningLog(ctx, "rbd: map error %v, rbd output: %s", err, stderr)
|
||||||
// unmap rbd image if connection timeout
|
// unmap rbd image if connection timeout
|
||||||
|
@ -156,6 +156,8 @@ type rbdVolume struct {
|
|||||||
LogStrategy string
|
LogStrategy string
|
||||||
VolName string
|
VolName string
|
||||||
MonValueFromSecret string
|
MonValueFromSecret string
|
||||||
|
// Network namespace file path to execute nsenter command
|
||||||
|
NetNamespaceFilePath string
|
||||||
// RequestedVolSize has the size of the volume requested by the user and
|
// RequestedVolSize has the size of the volume requested by the user and
|
||||||
// this value will not be updated when doing getImageInfo() on rbdVolume.
|
// this value will not be updated when doing getImageInfo() on rbdVolume.
|
||||||
RequestedVolSize int64
|
RequestedVolSize int64
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -32,6 +33,47 @@ import (
|
|||||||
// InvalidPoolID used to denote an invalid pool.
|
// InvalidPoolID used to denote an invalid pool.
|
||||||
const InvalidPoolID int64 = -1
|
const InvalidPoolID int64 = -1
|
||||||
|
|
||||||
|
// ExecuteCommandWithNSEnter executes passed in program with args with nsenter
|
||||||
|
// and returns separate stdout and stderr streams. In case ctx is not set to
|
||||||
|
// context.TODO(), the command will be logged after it was executed.
|
||||||
|
func ExecuteCommandWithNSEnter(ctx context.Context, netPath, program string, args ...string) (string, string, error) {
|
||||||
|
var (
|
||||||
|
sanitizedArgs = StripSecretInArgs(args)
|
||||||
|
stdoutBuf bytes.Buffer
|
||||||
|
stderrBuf bytes.Buffer
|
||||||
|
)
|
||||||
|
|
||||||
|
// check netPath exists
|
||||||
|
if _, err := os.Stat(netPath); err != nil {
|
||||||
|
return "", "", fmt.Errorf("failed to get stat for %s %w", netPath, err)
|
||||||
|
}
|
||||||
|
// nsenter --net=%s -- <program> <args>
|
||||||
|
args = append([]string{fmt.Sprintf("--net=%s", netPath), "--", program}, args...)
|
||||||
|
cmd := exec.Command("nsenter", args...) // #nosec:G204, commands executing not vulnerable.
|
||||||
|
|
||||||
|
cmd.Stdout = &stdoutBuf
|
||||||
|
cmd.Stderr = &stderrBuf
|
||||||
|
|
||||||
|
err := cmd.Run()
|
||||||
|
stdout := stdoutBuf.String()
|
||||||
|
stderr := stderrBuf.String()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("an error (%w) occurred while running %s args: %v", err, program, sanitizedArgs)
|
||||||
|
if ctx != context.TODO() {
|
||||||
|
log.UsefulLog(ctx, "%s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return stdout, stderr, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if ctx != context.TODO() {
|
||||||
|
log.UsefulLog(ctx, "command succeeded: %s %v", program, sanitizedArgs)
|
||||||
|
}
|
||||||
|
|
||||||
|
return stdout, stderr, nil
|
||||||
|
}
|
||||||
|
|
||||||
// ExecCommand executes passed in program with args and returns separate stdout
|
// ExecCommand executes passed in program with args and returns separate stdout
|
||||||
// and stderr streams. In case ctx is not set to context.TODO(), the command
|
// and stderr streams. In case ctx is not set to context.TODO(), the command
|
||||||
// will be logged after it was executed.
|
// will be logged after it was executed.
|
||||||
|
@ -49,6 +49,8 @@ type ClusterInfo struct {
|
|||||||
// SubvolumeGroup contains the name of the SubvolumeGroup for CSI volumes
|
// SubvolumeGroup contains the name of the SubvolumeGroup for CSI volumes
|
||||||
SubvolumeGroup string `json:"subvolumeGroup"`
|
SubvolumeGroup string `json:"subvolumeGroup"`
|
||||||
} `json:"cephFS"`
|
} `json:"cephFS"`
|
||||||
|
// symlink filepath for the network namespace where we need to execute commands.
|
||||||
|
NetNamespaceFilePath string `json:"netNamespaceFilePath"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expected JSON structure in the passed in config file is,
|
// Expected JSON structure in the passed in config file is,
|
||||||
@ -161,3 +163,12 @@ func GetClusterID(options map[string]string) (string, error) {
|
|||||||
|
|
||||||
return clusterID, nil
|
return clusterID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetNetNamespaceFilePath(pathToConfig, clusterID string) (string, error) {
|
||||||
|
cluster, err := readClusterInfo(pathToConfig, clusterID)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return cluster.NetNamespaceFilePath, nil
|
||||||
|
}
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@ -138,3 +139,69 @@ func TestCSIConfig(t *testing.T) {
|
|||||||
t.Errorf("Test setup error %s", err)
|
t.Errorf("Test setup error %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetNetNamespaceFilePath(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
clusterID string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "get NetNamespaceFilePath for cluster-1",
|
||||||
|
clusterID: "cluster-1",
|
||||||
|
want: "/var/lib/kubelet/plugins/rbd.ceph.csi.com/cluster1-net",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "get NetNamespaceFilePath for cluster-2",
|
||||||
|
clusterID: "cluster-2",
|
||||||
|
want: "/var/lib/kubelet/plugins/rbd.ceph.csi.com/cluster2-net",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "when NetNamespaceFilePath is empty",
|
||||||
|
clusterID: "cluster-3",
|
||||||
|
want: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
csiConfig := []ClusterInfo{
|
||||||
|
{
|
||||||
|
ClusterID: "cluster-1",
|
||||||
|
Monitors: []string{"ip-1", "ip-2"},
|
||||||
|
NetNamespaceFilePath: "/var/lib/kubelet/plugins/rbd.ceph.csi.com/cluster1-net",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ClusterID: "cluster-2",
|
||||||
|
Monitors: []string{"ip-3", "ip-4"},
|
||||||
|
NetNamespaceFilePath: "/var/lib/kubelet/plugins/rbd.ceph.csi.com/cluster2-net",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ClusterID: "cluster-3",
|
||||||
|
Monitors: []string{"ip-5", "ip-6"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
csiConfigFileContent, err := json.Marshal(csiConfig)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("failed to marshal csi config info %v", err)
|
||||||
|
}
|
||||||
|
tmpConfPath := t.TempDir() + "/ceph-csi.json"
|
||||||
|
err = os.WriteFile(tmpConfPath, csiConfigFileContent, 0o600)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("failed to write %s file content: %v", CsiConfigFile, err)
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
ts := tt
|
||||||
|
t.Run(ts.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
got, err := GetNetNamespaceFilePath(tmpConfPath, ts.clusterID)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("GetNetNamespaceFilePath() error = %v", err)
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if got != ts.want {
|
||||||
|
t.Errorf("GetNetNamespaceFilePath() = %v, want %v", got, ts.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user