2018-09-17 18:12:22 +00:00
|
|
|
/*
|
2019-04-03 08:46:15 +00:00
|
|
|
Copyright 2018 The Ceph-CSI Authors.
|
2018-09-17 18:12:22 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package rbd
|
|
|
|
|
|
|
|
import (
|
2019-08-22 16:57:23 +00:00
|
|
|
"context"
|
2019-08-01 21:42:33 +00:00
|
|
|
"encoding/json"
|
2020-06-11 08:04:32 +00:00
|
|
|
"errors"
|
2018-09-17 18:12:22 +00:00
|
|
|
"fmt"
|
2020-01-16 13:35:21 +00:00
|
|
|
"os"
|
2018-09-17 18:12:22 +00:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2020-04-17 09:23:49 +00:00
|
|
|
"github.com/ceph/ceph-csi/internal/util"
|
2021-08-24 15:03:25 +00:00
|
|
|
"github.com/ceph/ceph-csi/internal/util/log"
|
2019-06-01 21:26:42 +00:00
|
|
|
|
rbd: provide a way to supply mounter specific mapOptions from sc
Uses the below schema to supply mounter specific map/unmapOptions to the
nodeplugin based on the discussion we all had at
https://github.com/ceph/ceph-csi/pull/2636
This should specifically be really helpful with the `tryOthermonters`
set to true, i.e with fallback mechanism settings turned ON.
mapOption: "kbrd:v1,v2,v3;nbd:v1,v2,v3"
- By omitting `krbd:` or `nbd:`, the option(s) apply to
rbdDefaultMounter which is krbd.
- A user can _override_ the options for a mounter by specifying `krbd:`
or `nbd:`.
mapOption: "v1,v2,v3;nbd:v1,v2,v3"
is effectively the same as the 1st example.
- Sections are split by `;`.
- If users want to specify common options for both `krbd` and `nbd`,
they should mention them twice.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-11-16 13:10:11 +00:00
|
|
|
"github.com/container-storage-interface/spec/lib/go/csi"
|
2018-09-17 18:12:22 +00:00
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2019-08-01 21:42:33 +00:00
|
|
|
rbdTonbd = "rbd-nbd"
|
|
|
|
moduleNbd = "nbd"
|
|
|
|
|
|
|
|
accessTypeKRbd = "krbd"
|
|
|
|
accessTypeNbd = "nbd"
|
2018-09-17 18:12:22 +00:00
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
rbd = "rbd"
|
2019-08-03 22:11:28 +00:00
|
|
|
|
|
|
|
// Output strings returned during invocation of "rbd unmap --device-type... <imageSpec>" when
|
|
|
|
// image is not found to be mapped. Used to ignore errors when attempting to unmap such images.
|
|
|
|
// The %s format specifier should contain the <imageSpec> string
|
2021-07-08 14:59:34 +00:00
|
|
|
// NOTE: When using devicePath instead of imageSpec, the error strings are different.
|
2019-08-03 22:11:28 +00:00
|
|
|
rbdUnmapCmdkRbdMissingMap = "rbd: %s: not a mapped image or snapshot"
|
|
|
|
rbdUnmapCmdNbdMissingMap = "rbd-nbd: %s is not mapped"
|
2019-08-19 05:10:03 +00:00
|
|
|
rbdMapConnectionTimeout = "Connection timed out"
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
|
2021-08-19 06:23:15 +00:00
|
|
|
defaultNbdReAttachTimeout = 300 /* in seconds */
|
2021-08-19 06:25:21 +00:00
|
|
|
defaultNbdIOTimeout = 0 /* do not abort the requests */
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
|
2021-08-19 06:23:15 +00:00
|
|
|
// The default way of creating nbd devices via rbd-nbd is through the
|
|
|
|
// legacy ioctl interface, to take advantage of netlink features we
|
|
|
|
// should specify `try-netlink` flag explicitly.
|
|
|
|
useNbdNetlink = "try-netlink"
|
|
|
|
|
|
|
|
// `reattach-timeout` of rbd-nbd is to tweak NBD_ATTR_DEAD_CONN_TIMEOUT.
|
|
|
|
// It specifies how long the device should be held waiting for the
|
|
|
|
// userspace process to come back to life.
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
setNbdReattach = "reattach-timeout"
|
2021-08-19 06:25:21 +00:00
|
|
|
|
|
|
|
// `io-timeout` of rbd-nbd is to tweak NBD_ATTR_TIMEOUT. It specifies
|
|
|
|
// how long the IO should wait to get handled before bailing out.
|
|
|
|
setNbdIOTimeout = "io-timeout"
|
2018-09-17 18:12:22 +00:00
|
|
|
)
|
|
|
|
|
2021-09-29 14:13:16 +00:00
|
|
|
var (
|
|
|
|
hasNBD = true
|
|
|
|
hasNBDCookieSupport = false
|
|
|
|
)
|
2019-08-01 21:42:33 +00:00
|
|
|
|
2018-09-17 18:12:22 +00:00
|
|
|
func init() {
|
2021-09-29 14:13:16 +00:00
|
|
|
setRbdNbdToolFeatures()
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-19 12:21:03 +00:00
|
|
|
// rbdDeviceInfo strongly typed JSON spec for rbd device list output (of type krbd).
|
2019-08-01 21:42:33 +00:00
|
|
|
type rbdDeviceInfo struct {
|
2020-06-01 13:57:51 +00:00
|
|
|
ID string `json:"id"`
|
|
|
|
Pool string `json:"pool"`
|
2021-01-02 16:55:16 +00:00
|
|
|
RadosNamespace string `json:"namespace"`
|
2020-06-01 13:57:51 +00:00
|
|
|
Name string `json:"name"`
|
|
|
|
Device string `json:"device"`
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
// nbdDeviceInfo strongly typed JSON spec for rbd-nbd device list output (of type nbd)
|
|
|
|
// NOTE: There is a bug in rbd output that returns id as number for nbd, and string for krbd, thus
|
|
|
|
// requiring 2 different JSON structures to unmarshal the output.
|
2020-07-19 12:21:03 +00:00
|
|
|
// NOTE: image key is "name" in krbd output and "image" in nbd output, which is another difference.
|
2019-08-01 21:42:33 +00:00
|
|
|
type nbdDeviceInfo struct {
|
2020-06-01 13:57:51 +00:00
|
|
|
ID int64 `json:"id"`
|
|
|
|
Pool string `json:"pool"`
|
2021-01-02 16:55:16 +00:00
|
|
|
RadosNamespace string `json:"namespace"`
|
2020-06-01 13:57:51 +00:00
|
|
|
Name string `json:"image"`
|
|
|
|
Device string `json:"device"`
|
2019-08-01 21:42:33 +00:00
|
|
|
}
|
2018-09-17 18:12:22 +00:00
|
|
|
|
2021-08-23 11:23:15 +00:00
|
|
|
type detachRBDImageArgs struct {
|
|
|
|
imageOrDeviceSpec string
|
|
|
|
isImageSpec bool
|
|
|
|
isNbd bool
|
|
|
|
encrypted bool
|
|
|
|
volumeID string
|
|
|
|
unmapOptions string
|
2021-08-20 01:06:35 +00:00
|
|
|
logDir string
|
2021-09-01 11:53:43 +00:00
|
|
|
logStrategy string
|
2021-08-23 11:23:15 +00:00
|
|
|
}
|
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
// rbdGetDeviceList queries rbd about mapped devices and returns a list of rbdDeviceInfo
|
2020-07-19 12:21:03 +00:00
|
|
|
// It will selectively list devices mapped using krbd or nbd as specified by accessType.
|
2020-07-22 12:11:41 +00:00
|
|
|
func rbdGetDeviceList(ctx context.Context, accessType string) ([]rbdDeviceInfo, error) {
|
2019-08-01 21:42:33 +00:00
|
|
|
// rbd device list --format json --device-type [krbd|nbd]
|
|
|
|
var (
|
|
|
|
rbdDeviceList []rbdDeviceInfo
|
|
|
|
nbdDeviceList []nbdDeviceInfo
|
|
|
|
)
|
2018-09-17 18:12:22 +00:00
|
|
|
|
2020-07-22 12:11:41 +00:00
|
|
|
stdout, _, err := util.ExecCommand(ctx, rbd, "device", "list", "--format="+"json", "--device-type", accessType)
|
2018-09-17 18:12:22 +00:00
|
|
|
if err != nil {
|
2020-12-08 14:05:59 +00:00
|
|
|
return nil, fmt.Errorf("error getting device list from rbd for devices of type (%s): %w", accessType, err)
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
if accessType == accessTypeKRbd {
|
2020-07-22 12:53:22 +00:00
|
|
|
err = json.Unmarshal([]byte(stdout), &rbdDeviceList)
|
2019-08-01 21:42:33 +00:00
|
|
|
} else {
|
2020-07-22 12:53:22 +00:00
|
|
|
err = json.Unmarshal([]byte(stdout), &nbdDeviceList)
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
2019-08-01 21:42:33 +00:00
|
|
|
if err != nil {
|
2021-06-25 11:52:34 +00:00
|
|
|
return nil, fmt.Errorf(
|
|
|
|
"error to parse JSON output of device list for devices of type (%s): %w",
|
|
|
|
accessType,
|
|
|
|
err)
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
// convert output to a rbdDeviceInfo list for consumers
|
|
|
|
if accessType == accessTypeNbd {
|
|
|
|
for _, device := range nbdDeviceList {
|
|
|
|
rbdDeviceList = append(
|
|
|
|
rbdDeviceList,
|
|
|
|
rbdDeviceInfo{
|
2020-06-01 13:57:51 +00:00
|
|
|
ID: strconv.FormatInt(device.ID, 10),
|
|
|
|
Pool: device.Pool,
|
|
|
|
RadosNamespace: device.RadosNamespace,
|
|
|
|
Name: device.Name,
|
|
|
|
Device: device.Device,
|
2019-08-01 21:42:33 +00:00
|
|
|
})
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
return rbdDeviceList, nil
|
|
|
|
}
|
2019-01-29 05:49:16 +00:00
|
|
|
|
2020-06-01 13:57:51 +00:00
|
|
|
// findDeviceMappingImage finds a devicePath, if available, based on image spec (pool/{namespace/}image) on the node.
|
|
|
|
func findDeviceMappingImage(ctx context.Context, pool, namespace, image string, useNbdDriver bool) (string, bool) {
|
2019-08-01 21:42:33 +00:00
|
|
|
accessType := accessTypeKRbd
|
|
|
|
if useNbdDriver {
|
|
|
|
accessType = accessTypeNbd
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
2019-08-01 21:42:33 +00:00
|
|
|
|
2020-06-01 13:57:51 +00:00
|
|
|
imageSpec := fmt.Sprintf("%s/%s", pool, image)
|
|
|
|
if namespace != "" {
|
|
|
|
imageSpec = fmt.Sprintf("%s/%s/%s", pool, namespace, image)
|
|
|
|
}
|
|
|
|
|
2020-07-22 12:11:41 +00:00
|
|
|
rbdDeviceList, err := rbdGetDeviceList(ctx, accessType)
|
2019-01-29 05:49:16 +00:00
|
|
|
if err != nil {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.WarningLog(ctx, "failed to determine if image (%s) is mapped to a device (%v)", imageSpec, err)
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
return "", false
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
for _, device := range rbdDeviceList {
|
2020-06-01 13:57:51 +00:00
|
|
|
if device.Name == image && device.Pool == pool && device.RadosNamespace == namespace {
|
2019-08-01 21:42:33 +00:00
|
|
|
return device.Device, true
|
|
|
|
}
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
2019-08-01 21:42:33 +00:00
|
|
|
|
|
|
|
return "", false
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
|
|
|
|
2018-09-17 18:12:22 +00:00
|
|
|
// Stat a path, if it doesn't exist, retry maxRetries times.
|
2020-06-01 13:57:51 +00:00
|
|
|
func waitForPath(ctx context.Context, pool, namespace, image string, maxRetries int, useNbdDriver bool) (string, bool) {
|
2018-09-17 18:12:22 +00:00
|
|
|
for i := 0; i < maxRetries; i++ {
|
|
|
|
if i != 0 {
|
|
|
|
time.Sleep(time.Second)
|
|
|
|
}
|
2019-08-01 21:42:33 +00:00
|
|
|
|
2020-06-01 13:57:51 +00:00
|
|
|
device, found := findDeviceMappingImage(ctx, pool, namespace, image, useNbdDriver)
|
2019-08-01 21:42:33 +00:00
|
|
|
if found {
|
|
|
|
return device, found
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
}
|
2019-08-01 21:42:33 +00:00
|
|
|
|
2018-09-17 18:12:22 +00:00
|
|
|
return "", false
|
|
|
|
}
|
|
|
|
|
2021-09-29 14:13:16 +00:00
|
|
|
// set features available with rbd-nbd, and NBD module loaded status.
|
|
|
|
func setRbdNbdToolFeatures() {
|
2022-02-14 11:18:31 +00:00
|
|
|
var stderr string
|
2020-01-16 13:35:21 +00:00
|
|
|
// check if the module is loaded or compiled in
|
|
|
|
_, err := os.Stat(fmt.Sprintf("/sys/module/%s", moduleNbd))
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
// try to load the module
|
2022-02-14 11:18:31 +00:00
|
|
|
_, stderr, err = util.ExecCommand(context.TODO(), "modprobe", moduleNbd)
|
2020-01-16 13:35:21 +00:00
|
|
|
if err != nil {
|
2021-09-29 14:13:16 +00:00
|
|
|
hasNBD = false
|
2022-02-14 11:18:31 +00:00
|
|
|
log.WarningLogMsg("rbd-nbd: nbd modprobe failed (%v): %q", err, stderr)
|
2020-01-16 13:35:21 +00:00
|
|
|
}
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2021-09-29 14:13:16 +00:00
|
|
|
stdout, stderr, err := util.ExecCommand(context.TODO(), rbdTonbd, "--help")
|
|
|
|
if err != nil || stderr != "" {
|
|
|
|
hasNBD = false
|
|
|
|
log.WarningLogMsg("running rbd-nbd --help failed with error:%v, stderr:%s", err, stderr)
|
|
|
|
}
|
|
|
|
|
|
|
|
if strings.Contains(stdout, "--cookie") {
|
|
|
|
hasNBDCookieSupport = true
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2021-09-29 14:13:16 +00:00
|
|
|
log.DefaultLog("NBD module loaded: %t, rbd-nbd supported features, cookie: %t", hasNBD, hasNBDCookieSupport)
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
|
rbd: provide a way to supply mounter specific mapOptions from sc
Uses the below schema to supply mounter specific map/unmapOptions to the
nodeplugin based on the discussion we all had at
https://github.com/ceph/ceph-csi/pull/2636
This should specifically be really helpful with the `tryOthermonters`
set to true, i.e with fallback mechanism settings turned ON.
mapOption: "kbrd:v1,v2,v3;nbd:v1,v2,v3"
- By omitting `krbd:` or `nbd:`, the option(s) apply to
rbdDefaultMounter which is krbd.
- A user can _override_ the options for a mounter by specifying `krbd:`
or `nbd:`.
mapOption: "v1,v2,v3;nbd:v1,v2,v3"
is effectively the same as the 1st example.
- Sections are split by `;`.
- If users want to specify common options for both `krbd` and `nbd`,
they should mention them twice.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-11-16 13:10:11 +00:00
|
|
|
// parseMapOptions helps parse formatted mapOptions and unmapOptions and
|
|
|
|
// returns mounter specific options.
|
|
|
|
func parseMapOptions(mapOptions string) (string, string, error) {
|
|
|
|
var krbdMapOptions, nbdMapOptions string
|
|
|
|
const (
|
|
|
|
noKeyLength = 1
|
|
|
|
validLength = 2
|
|
|
|
)
|
|
|
|
for _, item := range strings.Split(mapOptions, ";") {
|
|
|
|
var mounter, options string
|
|
|
|
if item == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
s := strings.Split(item, ":")
|
|
|
|
switch len(s) {
|
|
|
|
case noKeyLength:
|
|
|
|
options = strings.TrimSpace(s[0])
|
|
|
|
krbdMapOptions = options
|
|
|
|
case validLength:
|
|
|
|
mounter = strings.TrimSpace(s[0])
|
|
|
|
options = strings.TrimSpace(s[1])
|
|
|
|
switch strings.ToLower(mounter) {
|
|
|
|
case accessTypeKRbd:
|
|
|
|
krbdMapOptions = options
|
|
|
|
case accessTypeNbd:
|
|
|
|
nbdMapOptions = options
|
|
|
|
default:
|
|
|
|
return "", "", fmt.Errorf("unknown mounter type: %q", mounter)
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return "", "", fmt.Errorf("badly formatted map/unmap options: %q", mapOptions)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return krbdMapOptions, nbdMapOptions, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getMapOptions is a wrapper func, calls parse map/unmap funcs and feeds the
|
|
|
|
// rbdVolume object.
|
|
|
|
func getMapOptions(req *csi.NodeStageVolumeRequest, rv *rbdVolume) error {
|
|
|
|
krbdMapOptions, nbdMapOptions, err := parseMapOptions(req.GetVolumeContext()["mapOptions"])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
krbdUnmapOptions, nbdUnmapOptions, err := parseMapOptions(req.GetVolumeContext()["unmapOptions"])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if rv.Mounter == rbdDefaultMounter {
|
|
|
|
rv.MapOptions = krbdMapOptions
|
|
|
|
rv.UnmapOptions = krbdUnmapOptions
|
|
|
|
} else if rv.Mounter == rbdNbdMounter {
|
|
|
|
rv.MapOptions = nbdMapOptions
|
|
|
|
rv.UnmapOptions = nbdUnmapOptions
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
func attachRBDImage(ctx context.Context, volOptions *rbdVolume, device string, cr *util.Credentials) (string, error) {
|
2018-09-17 18:12:22 +00:00
|
|
|
var err error
|
|
|
|
|
2019-04-22 21:35:39 +00:00
|
|
|
image := volOptions.RbdImageName
|
2018-09-18 14:09:12 +00:00
|
|
|
useNBD := false
|
2019-01-17 06:20:33 +00:00
|
|
|
if volOptions.Mounter == rbdTonbd && hasNBD {
|
2018-09-18 14:09:12 +00:00
|
|
|
useNBD = true
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
2018-09-18 14:09:12 +00:00
|
|
|
|
2020-06-01 13:57:51 +00:00
|
|
|
devicePath, found := waitForPath(ctx, volOptions.Pool, volOptions.RadosNamespace, image, 1, useNBD)
|
2018-09-17 18:12:22 +00:00
|
|
|
if !found {
|
|
|
|
backoff := wait.Backoff{
|
|
|
|
Duration: rbdImageWatcherInitDelay,
|
|
|
|
Factor: rbdImageWatcherFactor,
|
|
|
|
Steps: rbdImageWatcherSteps,
|
|
|
|
}
|
2019-03-14 00:18:04 +00:00
|
|
|
|
2020-07-22 13:33:36 +00:00
|
|
|
err = waitForrbdImage(ctx, backoff, volOptions)
|
2019-01-28 19:55:10 +00:00
|
|
|
|
2018-09-21 14:38:50 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
devicePath, err = createPath(ctx, volOptions, device, cr)
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return devicePath, err
|
|
|
|
}
|
|
|
|
|
2022-01-25 07:37:15 +00:00
|
|
|
func appendNbdDeviceTypeAndOptions(cmdArgs []string, userOptions, cookie string) []string {
|
2021-09-01 18:43:37 +00:00
|
|
|
cmdArgs = append(cmdArgs, "--device-type", accessTypeNbd)
|
2020-08-21 17:00:06 +00:00
|
|
|
|
2021-09-01 18:43:37 +00:00
|
|
|
isUnmap := CheckSliceContains(cmdArgs, "unmap")
|
|
|
|
if !isUnmap {
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
if !strings.Contains(userOptions, useNbdNetlink) {
|
|
|
|
cmdArgs = append(cmdArgs, "--options", useNbdNetlink)
|
|
|
|
}
|
|
|
|
if !strings.Contains(userOptions, setNbdReattach) {
|
|
|
|
cmdArgs = append(cmdArgs, "--options", fmt.Sprintf("%s=%d", setNbdReattach, defaultNbdReAttachTimeout))
|
|
|
|
}
|
2021-08-19 06:25:21 +00:00
|
|
|
if !strings.Contains(userOptions, setNbdIOTimeout) {
|
|
|
|
cmdArgs = append(cmdArgs, "--options", fmt.Sprintf("%s=%d", setNbdIOTimeout, defaultNbdIOTimeout))
|
|
|
|
}
|
2021-09-01 18:43:37 +00:00
|
|
|
|
rbd: utilize cookie support from rbd for nbd
Problem:
On remap/attach of device (i.e. nodeplugin restart), there is no way
for rbd-nbd to defend if the backend storage is matching with the initial
backend storage.
Say, if an initial map request for backend "pool1/image1" got mapped to
/dev/nbd0 and the userspace process is terminated (on nodeplugin restart).
A next remap/attach (nodeplugin start) request within reattach-timeout is
allowed to use /dev/nbd0 for a different backend "pool1/image2"
For example, an operation like below could be dangerous:
$ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4"
$ sudo pkill -15 rbd-nbd <-- nodeplugin terminate
$ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs"
Solution:
rbd-nbd/kernel now provides a way to keep some metadata in sysfs to identify
between the device and the backend, so that when a remap/attach request is
made, rbd-nbd can compare and avoid such dangerous operations.
With the provided solution, as part of the initial map request, backend
cookie (ceph-csi VOLID) can be stored in the sysfs per device config, so
that on a remap/attach request rbd-nbd will check and validate if the
backend per device cookie matches with the initial map backend with the help
of cookie.
At Ceph-csi we use VOLID as device cookie, which will be unique, we pass
the VOLID as cookie at map and use the same at the time of attach, that
way rbd-nbd can identify backends and their matching devices.
Requires:
https://github.com/ceph/ceph/pull/41323
https://lkml.org/lkml/2021/4/29/274
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-09-21 13:13:10 +00:00
|
|
|
if hasNBDCookieSupport {
|
|
|
|
cmdArgs = append(cmdArgs, "--options", fmt.Sprintf("cookie=%s", cookie))
|
|
|
|
}
|
2021-09-01 18:43:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if userOptions != "" {
|
|
|
|
// userOptions is appended after, possibly overriding the above
|
|
|
|
// default options.
|
|
|
|
cmdArgs = append(cmdArgs, "--options", userOptions)
|
2020-09-22 08:07:10 +00:00
|
|
|
}
|
2021-09-01 18:43:37 +00:00
|
|
|
|
|
|
|
return cmdArgs
|
|
|
|
}
|
|
|
|
|
2022-01-25 07:37:15 +00:00
|
|
|
func appendKRbdDeviceTypeAndOptions(cmdArgs []string, userOptions string) []string {
|
2021-09-01 18:43:37 +00:00
|
|
|
// Enable mapping and unmapping images from a non-initial network
|
|
|
|
// namespace (e.g. for Multus CNI). The network namespace must be
|
|
|
|
// owned by the initial user namespace.
|
2022-01-25 07:37:15 +00:00
|
|
|
cmdArgs = append(cmdArgs, "--device-type", accessTypeKRbd, "--options", "noudev")
|
2021-09-01 18:43:37 +00:00
|
|
|
|
2020-08-21 17:00:06 +00:00
|
|
|
if userOptions != "" {
|
2020-09-22 08:07:10 +00:00
|
|
|
// userOptions is appended after, possibly overriding the above
|
|
|
|
// default options.
|
2020-08-21 17:00:06 +00:00
|
|
|
cmdArgs = append(cmdArgs, "--options", userOptions)
|
|
|
|
}
|
|
|
|
|
|
|
|
return cmdArgs
|
|
|
|
}
|
|
|
|
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
// appendRbdNbdCliOptions append mandatory options and convert list of useroptions
|
|
|
|
// provided for rbd integrated cli to rbd-nbd cli format specific.
|
rbd: utilize cookie support from rbd for nbd
Problem:
On remap/attach of device (i.e. nodeplugin restart), there is no way
for rbd-nbd to defend if the backend storage is matching with the initial
backend storage.
Say, if an initial map request for backend "pool1/image1" got mapped to
/dev/nbd0 and the userspace process is terminated (on nodeplugin restart).
A next remap/attach (nodeplugin start) request within reattach-timeout is
allowed to use /dev/nbd0 for a different backend "pool1/image2"
For example, an operation like below could be dangerous:
$ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4"
$ sudo pkill -15 rbd-nbd <-- nodeplugin terminate
$ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs"
Solution:
rbd-nbd/kernel now provides a way to keep some metadata in sysfs to identify
between the device and the backend, so that when a remap/attach request is
made, rbd-nbd can compare and avoid such dangerous operations.
With the provided solution, as part of the initial map request, backend
cookie (ceph-csi VOLID) can be stored in the sysfs per device config, so
that on a remap/attach request rbd-nbd will check and validate if the
backend per device cookie matches with the initial map backend with the help
of cookie.
At Ceph-csi we use VOLID as device cookie, which will be unique, we pass
the VOLID as cookie at map and use the same at the time of attach, that
way rbd-nbd can identify backends and their matching devices.
Requires:
https://github.com/ceph/ceph/pull/41323
https://lkml.org/lkml/2021/4/29/274
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-09-21 13:13:10 +00:00
|
|
|
func appendRbdNbdCliOptions(cmdArgs []string, userOptions, cookie string) []string {
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
if !strings.Contains(userOptions, useNbdNetlink) {
|
|
|
|
cmdArgs = append(cmdArgs, fmt.Sprintf("--%s", useNbdNetlink))
|
|
|
|
}
|
|
|
|
if !strings.Contains(userOptions, setNbdReattach) {
|
|
|
|
cmdArgs = append(cmdArgs, fmt.Sprintf("--%s=%d", setNbdReattach, defaultNbdReAttachTimeout))
|
|
|
|
}
|
2021-08-19 06:25:21 +00:00
|
|
|
if !strings.Contains(userOptions, setNbdIOTimeout) {
|
|
|
|
cmdArgs = append(cmdArgs, fmt.Sprintf("--%s=%d", setNbdIOTimeout, defaultNbdIOTimeout))
|
|
|
|
}
|
rbd: utilize cookie support from rbd for nbd
Problem:
On remap/attach of device (i.e. nodeplugin restart), there is no way
for rbd-nbd to defend if the backend storage is matching with the initial
backend storage.
Say, if an initial map request for backend "pool1/image1" got mapped to
/dev/nbd0 and the userspace process is terminated (on nodeplugin restart).
A next remap/attach (nodeplugin start) request within reattach-timeout is
allowed to use /dev/nbd0 for a different backend "pool1/image2"
For example, an operation like below could be dangerous:
$ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4"
$ sudo pkill -15 rbd-nbd <-- nodeplugin terminate
$ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs"
Solution:
rbd-nbd/kernel now provides a way to keep some metadata in sysfs to identify
between the device and the backend, so that when a remap/attach request is
made, rbd-nbd can compare and avoid such dangerous operations.
With the provided solution, as part of the initial map request, backend
cookie (ceph-csi VOLID) can be stored in the sysfs per device config, so
that on a remap/attach request rbd-nbd will check and validate if the
backend per device cookie matches with the initial map backend with the help
of cookie.
At Ceph-csi we use VOLID as device cookie, which will be unique, we pass
the VOLID as cookie at map and use the same at the time of attach, that
way rbd-nbd can identify backends and their matching devices.
Requires:
https://github.com/ceph/ceph/pull/41323
https://lkml.org/lkml/2021/4/29/274
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-09-21 13:13:10 +00:00
|
|
|
if hasNBDCookieSupport {
|
|
|
|
cmdArgs = append(cmdArgs, fmt.Sprintf("--cookie=%s", cookie))
|
|
|
|
}
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
if userOptions != "" {
|
|
|
|
options := strings.Split(userOptions, ",")
|
|
|
|
for _, opt := range options {
|
|
|
|
cmdArgs = append(cmdArgs, fmt.Sprintf("--%s", opt))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return cmdArgs
|
|
|
|
}
|
|
|
|
|
|
|
|
func createPath(ctx context.Context, volOpt *rbdVolume, device string, cr *util.Credentials) (string, error) {
|
2019-08-19 05:10:03 +00:00
|
|
|
isNbd := false
|
2020-05-28 18:39:44 +00:00
|
|
|
imagePath := volOpt.String()
|
2019-01-29 05:49:16 +00:00
|
|
|
|
2021-08-24 15:03:25 +00:00
|
|
|
log.TraceLog(ctx, "rbd: map mon %s", volOpt.Monitors)
|
2019-01-29 05:49:16 +00:00
|
|
|
|
2020-09-22 08:06:52 +00:00
|
|
|
mapArgs := []string{
|
2019-08-01 21:42:33 +00:00
|
|
|
"--id", cr.ID,
|
|
|
|
"-m", volOpt.Monitors,
|
2019-08-03 22:11:28 +00:00
|
|
|
"--keyfile=" + cr.KeyFile,
|
|
|
|
}
|
2019-08-01 21:42:33 +00:00
|
|
|
|
|
|
|
// Choose access protocol
|
2019-01-29 05:49:16 +00:00
|
|
|
if volOpt.Mounter == rbdTonbd && hasNBD {
|
2019-08-19 05:10:03 +00:00
|
|
|
isNbd = true
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
|
|
|
|
2021-08-18 07:21:23 +00:00
|
|
|
if isNbd {
|
|
|
|
mapArgs = append(mapArgs, "--log-file",
|
|
|
|
getCephClientLogFileName(volOpt.VolID, volOpt.LogDir, "rbd-nbd"))
|
|
|
|
}
|
|
|
|
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
cli := rbd
|
|
|
|
if device != "" {
|
|
|
|
// TODO: use rbd cli for attach/detach in the future
|
|
|
|
cli = rbdNbdMounter
|
|
|
|
mapArgs = append(mapArgs, "attach", imagePath, "--device", device)
|
rbd: utilize cookie support from rbd for nbd
Problem:
On remap/attach of device (i.e. nodeplugin restart), there is no way
for rbd-nbd to defend if the backend storage is matching with the initial
backend storage.
Say, if an initial map request for backend "pool1/image1" got mapped to
/dev/nbd0 and the userspace process is terminated (on nodeplugin restart).
A next remap/attach (nodeplugin start) request within reattach-timeout is
allowed to use /dev/nbd0 for a different backend "pool1/image2"
For example, an operation like below could be dangerous:
$ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4"
$ sudo pkill -15 rbd-nbd <-- nodeplugin terminate
$ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image
/dev/nbd0
$ sudo blkid /dev/nbd0
/dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs"
Solution:
rbd-nbd/kernel now provides a way to keep some metadata in sysfs to identify
between the device and the backend, so that when a remap/attach request is
made, rbd-nbd can compare and avoid such dangerous operations.
With the provided solution, as part of the initial map request, backend
cookie (ceph-csi VOLID) can be stored in the sysfs per device config, so
that on a remap/attach request rbd-nbd will check and validate if the
backend per device cookie matches with the initial map backend with the help
of cookie.
At Ceph-csi we use VOLID as device cookie, which will be unique, we pass
the VOLID as cookie at map and use the same at the time of attach, that
way rbd-nbd can identify backends and their matching devices.
Requires:
https://github.com/ceph/ceph/pull/41323
https://lkml.org/lkml/2021/4/29/274
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-09-21 13:13:10 +00:00
|
|
|
mapArgs = appendRbdNbdCliOptions(mapArgs, volOpt.MapOptions, volOpt.VolID)
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
} else {
|
|
|
|
mapArgs = append(mapArgs, "map", imagePath)
|
2021-09-01 18:43:37 +00:00
|
|
|
if isNbd {
|
2022-01-25 07:37:15 +00:00
|
|
|
mapArgs = appendNbdDeviceTypeAndOptions(mapArgs, volOpt.MapOptions, volOpt.VolID)
|
2021-09-01 18:43:37 +00:00
|
|
|
} else {
|
2022-01-25 07:37:15 +00:00
|
|
|
mapArgs = appendKRbdDeviceTypeAndOptions(mapArgs, volOpt.MapOptions)
|
2021-09-01 18:43:37 +00:00
|
|
|
}
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 14:47:43 +00:00
|
|
|
if volOpt.readOnly {
|
2020-09-22 08:06:52 +00:00
|
|
|
mapArgs = append(mapArgs, "--read-only")
|
2020-04-16 14:47:43 +00:00
|
|
|
}
|
2020-09-08 05:23:28 +00:00
|
|
|
|
2019-08-01 21:42:33 +00:00
|
|
|
// Execute map
|
rbd: add volume healer
Problem:
-------
For rbd nbd userspace mounter backends, after a restart of the nodeplugin
all the mounts will start seeing IO errors. This is because, for rbd-nbd
backends there will be a userspace mount daemon running per volume, post
restart of the nodeplugin pod, there is no way to restore the daemons
back to life.
Solution:
--------
The volume healer is a one-time activity that is triggered at the startup
time of the rbd nodeplugin. It navigates through the list of volume
attachments on the node and acts accordingly.
For now, it is limited to nbd type storage only, but it is flexible and
can be extended in the future for other backend types as needed.
From a few feets above:
This solves a severe problem for nbd backed csi volumes. The healer while
going through the list of volume attachments on the node, if finds the
volume is in attached state and is of type nbd, then it will attempt to
fix the rbd-nbd volumes by sending a NodeStageVolume request with the
required volume attributes like secrets, device name, image attributes,
and etc.. which will finally help start the required rbd-nbd daemons in
the nodeplugin csi-rbdplugin container. This will allow reattaching the
backend images with the right nbd device, thus allowing the applications
to perform IO without any interruptions even after a nodeplugin restart.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
2021-05-31 11:13:54 +00:00
|
|
|
stdout, stderr, err := util.ExecCommand(ctx, cli, mapArgs...)
|
2019-01-29 05:49:16 +00:00
|
|
|
if err != nil {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.WarningLog(ctx, "rbd: map error %v, rbd output: %s", err, stderr)
|
2019-08-19 05:10:03 +00:00
|
|
|
// unmap rbd image if connection timeout
|
|
|
|
if strings.Contains(err.Error(), rbdMapConnectionTimeout) {
|
2021-08-23 11:23:15 +00:00
|
|
|
dArgs := detachRBDImageArgs{
|
|
|
|
imageOrDeviceSpec: imagePath,
|
|
|
|
isImageSpec: true,
|
|
|
|
isNbd: isNbd,
|
|
|
|
encrypted: volOpt.isEncrypted(),
|
|
|
|
volumeID: volOpt.VolID,
|
|
|
|
unmapOptions: volOpt.UnmapOptions,
|
2021-08-20 01:06:35 +00:00
|
|
|
logDir: volOpt.LogDir,
|
2021-09-01 11:53:43 +00:00
|
|
|
logStrategy: volOpt.LogStrategy,
|
2021-08-23 11:23:15 +00:00
|
|
|
}
|
2021-09-01 19:33:12 +00:00
|
|
|
detErr := detachRBDImageOrDeviceSpec(ctx, &dArgs)
|
2019-08-19 05:10:03 +00:00
|
|
|
if detErr != nil {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.WarningLog(ctx, "rbd: %s unmap error %v", imagePath, detErr)
|
2019-08-19 05:10:03 +00:00
|
|
|
}
|
|
|
|
}
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2021-07-10 10:45:11 +00:00
|
|
|
return "", fmt.Errorf("rbd: map failed with error %w, rbd error output: %s", err, stderr)
|
2019-01-29 05:49:16 +00:00
|
|
|
}
|
2020-07-22 12:53:22 +00:00
|
|
|
devicePath := strings.TrimSuffix(stdout, "\n")
|
2019-08-01 21:42:33 +00:00
|
|
|
|
2018-09-17 18:12:22 +00:00
|
|
|
return devicePath, nil
|
|
|
|
}
|
|
|
|
|
2020-07-22 13:33:36 +00:00
|
|
|
func waitForrbdImage(ctx context.Context, backoff wait.Backoff, volOptions *rbdVolume) error {
|
2020-05-28 18:39:44 +00:00
|
|
|
imagePath := volOptions.String()
|
2019-01-28 19:55:10 +00:00
|
|
|
|
|
|
|
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
2020-07-22 13:33:36 +00:00
|
|
|
used, err := volOptions.isInUse()
|
2019-01-28 19:55:10 +00:00
|
|
|
if err != nil {
|
2020-07-22 13:33:36 +00:00
|
|
|
return false, fmt.Errorf("fail to check rbd image status: (%w)", err)
|
2019-01-28 19:55:10 +00:00
|
|
|
}
|
2019-03-14 00:18:04 +00:00
|
|
|
if (volOptions.DisableInUseChecks) && (used) {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.UsefulLog(ctx, "valid multi-node attach requested, ignoring watcher in-use result")
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-03-14 00:18:04 +00:00
|
|
|
return used, nil
|
|
|
|
}
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-01-28 19:55:10 +00:00
|
|
|
return !used, nil
|
|
|
|
})
|
|
|
|
// return error if rbd image has not become available for the specified timeout
|
2020-06-11 08:04:32 +00:00
|
|
|
if errors.Is(err, wait.ErrWaitTimeout) {
|
2019-01-28 19:55:10 +00:00
|
|
|
return fmt.Errorf("rbd image %s is still being used", imagePath)
|
|
|
|
}
|
|
|
|
// return error if any other errors were encountered during waiting for the image to become available
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-09-08 05:23:28 +00:00
|
|
|
func detachRBDDevice(ctx context.Context, devicePath, volumeID, unmapOptions string, encrypted bool) error {
|
2019-08-03 22:11:28 +00:00
|
|
|
nbdType := false
|
|
|
|
if strings.HasPrefix(devicePath, "/dev/nbd") {
|
|
|
|
nbdType = true
|
|
|
|
}
|
|
|
|
|
2021-08-23 11:23:15 +00:00
|
|
|
dArgs := detachRBDImageArgs{
|
|
|
|
imageOrDeviceSpec: devicePath,
|
|
|
|
isImageSpec: false,
|
|
|
|
isNbd: nbdType,
|
|
|
|
encrypted: encrypted,
|
|
|
|
volumeID: volumeID,
|
|
|
|
unmapOptions: unmapOptions,
|
|
|
|
}
|
|
|
|
|
2021-09-01 19:33:12 +00:00
|
|
|
return detachRBDImageOrDeviceSpec(ctx, &dArgs)
|
2019-08-03 22:11:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// detachRBDImageOrDeviceSpec detaches an rbd imageSpec or devicePath, with additional checking
|
2020-07-19 12:21:03 +00:00
|
|
|
// when imageSpec is used to decide if image is already unmapped.
|
2021-06-25 11:52:34 +00:00
|
|
|
func detachRBDImageOrDeviceSpec(
|
|
|
|
ctx context.Context,
|
2021-09-01 19:33:12 +00:00
|
|
|
dArgs *detachRBDImageArgs) error {
|
2021-08-23 11:23:15 +00:00
|
|
|
if dArgs.encrypted {
|
|
|
|
mapperFile, mapperPath := util.VolumeMapper(dArgs.volumeID)
|
2020-01-29 11:44:45 +00:00
|
|
|
mappedDevice, mapper, err := util.DeviceEncryptionStatus(ctx, mapperPath)
|
2019-12-13 11:41:32 +00:00
|
|
|
if err != nil {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.ErrorLog(ctx, "error determining LUKS device on %s, %s: %s",
|
2021-08-23 11:23:15 +00:00
|
|
|
mapperPath, dArgs.imageOrDeviceSpec, err)
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-12-13 11:41:32 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-01-29 11:44:45 +00:00
|
|
|
if len(mapper) > 0 {
|
|
|
|
// mapper found, so it is open Luks device
|
|
|
|
err = util.CloseEncryptedVolume(ctx, mapperFile)
|
|
|
|
if err != nil {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.ErrorLog(ctx, "error closing LUKS device on %s, %s: %s",
|
2021-08-23 11:23:15 +00:00
|
|
|
mapperPath, dArgs.imageOrDeviceSpec, err)
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2020-01-29 11:44:45 +00:00
|
|
|
return err
|
|
|
|
}
|
2021-08-23 11:23:15 +00:00
|
|
|
dArgs.imageOrDeviceSpec = mappedDevice
|
2020-01-29 11:44:45 +00:00
|
|
|
}
|
2019-12-13 11:41:32 +00:00
|
|
|
}
|
|
|
|
|
2021-08-23 11:23:15 +00:00
|
|
|
unmapArgs := []string{"unmap", dArgs.imageOrDeviceSpec}
|
2021-09-01 18:43:37 +00:00
|
|
|
if dArgs.isNbd {
|
2022-01-25 07:37:15 +00:00
|
|
|
unmapArgs = appendNbdDeviceTypeAndOptions(unmapArgs, dArgs.unmapOptions, dArgs.volumeID)
|
2021-09-01 18:43:37 +00:00
|
|
|
} else {
|
2022-01-25 07:37:15 +00:00
|
|
|
unmapArgs = appendKRbdDeviceTypeAndOptions(unmapArgs, dArgs.unmapOptions)
|
2021-09-01 18:43:37 +00:00
|
|
|
}
|
2020-09-22 08:06:52 +00:00
|
|
|
|
|
|
|
_, stderr, err := util.ExecCommand(ctx, rbd, unmapArgs...)
|
2018-09-17 18:12:22 +00:00
|
|
|
if err != nil {
|
2019-08-03 22:11:28 +00:00
|
|
|
// Messages for krbd and nbd differ, hence checking either of them for missing mapping
|
|
|
|
// This is not applicable when a device path is passed in
|
2021-08-23 11:23:15 +00:00
|
|
|
if dArgs.isImageSpec &&
|
|
|
|
(strings.Contains(stderr, fmt.Sprintf(rbdUnmapCmdkRbdMissingMap, dArgs.imageOrDeviceSpec)) ||
|
|
|
|
strings.Contains(stderr, fmt.Sprintf(rbdUnmapCmdNbdMissingMap, dArgs.imageOrDeviceSpec))) {
|
2019-08-03 22:11:28 +00:00
|
|
|
// Devices found not to be mapped are treated as a successful detach
|
2021-08-24 15:03:25 +00:00
|
|
|
log.TraceLog(ctx, "image or device spec (%s) not mapped", dArgs.imageOrDeviceSpec)
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-08-03 22:11:28 +00:00
|
|
|
return nil
|
|
|
|
}
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2021-08-23 11:23:15 +00:00
|
|
|
return fmt.Errorf("rbd: unmap for spec (%s) failed (%w): (%s)", dArgs.imageOrDeviceSpec, err, stderr)
|
2018-09-17 18:12:22 +00:00
|
|
|
}
|
2021-08-20 01:06:35 +00:00
|
|
|
if dArgs.isNbd && dArgs.logDir != "" {
|
|
|
|
logFile := getCephClientLogFileName(dArgs.volumeID, dArgs.logDir, "rbd-nbd")
|
2021-09-01 11:53:43 +00:00
|
|
|
go strategicActionOnLogFile(ctx, dArgs.logStrategy, logFile)
|
2021-08-20 01:06:35 +00:00
|
|
|
}
|
2018-09-17 18:12:22 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|