rbd: add support for rbd striping

RBD supports creating rbd images with
object size, stripe unit and stripe count
to support striping. This PR adds the support
for the same.

More details about striping at
https://docs.ceph.com/en/quincy/man/8/rbd/#striping

fixes: #3124

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
Madhu Rajanna 2022-05-24 09:08:42 +05:30 committed by mergify[bot]
parent 8f99fe7250
commit 4b57cc3ec5
7 changed files with 447 additions and 36 deletions

View File

@ -66,6 +66,9 @@ make image-cephcsi
| `mounter` | no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images | | `mounter` | no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images |
| `encrypted` | no | disabled by default, use `"true"` to enable LUKS encryption on PVC and `"false"` to disable it. **Do not change for existing storageclasses** | | `encrypted` | no | disabled by default, use `"true"` to enable LUKS encryption on PVC and `"false"` to disable it. **Do not change for existing storageclasses** |
| `encryptionKMSID` | no | required if encryption is enabled and a kms is used to store passphrases | | `encryptionKMSID` | no | required if encryption is enabled and a kms is used to store passphrases |
| `stripeUnit` | no | stripe unit in bytes |
| `stripeCount` | no | objects to stripe over before looping |
| `objectSize` | no | object size in bytes |
**NOTE:** An accompanying CSI configuration file, needs to be provided to the **NOTE:** An accompanying CSI configuration file, needs to be provided to the
running pods. Refer to [Creating CSI configuration](../examples/README.md#creating-csi-configuration) running pods. Refer to [Creating CSI configuration](../examples/README.md#creating-csi-configuration)

View File

@ -4080,6 +4080,153 @@ var _ = Describe("RBD", func() {
}) })
}) })
By("validate rbd image stripe", func() {
stripeUnit := 4096
stripeCount := 8
objectSize := 131072
err := deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass: %v", err)
}
err = createRBDStorageClass(
f.ClientSet,
f,
defaultSCName,
nil,
map[string]string{
"stripeUnit": fmt.Sprintf("%d", stripeUnit),
"stripeCount": fmt.Sprintf("%d", stripeCount),
"objectSize": fmt.Sprintf("%d", objectSize),
},
deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass: %v", err)
}
defer func() {
err = deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass: %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, defaultSCName, nil, nil, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass: %v", err)
}
}()
err = createRBDSnapshotClass(f)
if err != nil {
e2elog.Failf("failed to create storageclass: %v", err)
}
defer func() {
err = deleteRBDSnapshotClass()
if err != nil {
e2elog.Failf("failed to delete VolumeSnapshotClass: %v", err)
}
}()
// create PVC and bind it to an app
pvc, err := loadPVC(pvcPath)
if err != nil {
e2elog.Failf("failed to load PVC: %v", err)
}
pvc.Namespace = f.UniqueName
err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout)
if err != nil {
e2elog.Failf("failed to create PVC and application: %v", err)
}
// validate created backend rbd images
validateRBDImageCount(f, 1, defaultRBDPool)
validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType)
err = validateStripe(f, pvc, stripeUnit, stripeCount, objectSize)
if err != nil {
e2elog.Failf("failed to validate stripe: %v", err)
}
snap := getSnapshot(snapshotPath)
snap.Namespace = f.UniqueName
snap.Spec.Source.PersistentVolumeClaimName = &pvc.Name
err = createSnapshot(&snap, deployTimeout)
if err != nil {
e2elog.Failf("failed to create snapshot: %v", err)
}
// validate created backend rbd images
// parent PVC + snapshot
totalImages := 2
validateRBDImageCount(f, totalImages, defaultRBDPool)
validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType)
validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType)
pvcClone, err := loadPVC(pvcClonePath)
if err != nil {
e2elog.Failf("failed to load PVC: %v", err)
}
// create clone PVC as ROX
pvcClone.Namespace = f.UniqueName
pvcClone.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany}
err = createPVCAndvalidatePV(f.ClientSet, pvcClone, deployTimeout)
if err != nil {
e2elog.Failf("failed to create PVC: %v", err)
}
// validate created backend rbd images
// parent pvc + snapshot + clone
totalImages = 3
validateRBDImageCount(f, totalImages, defaultRBDPool)
validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType)
validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType)
err = validateStripe(f, pvcClone, stripeUnit, stripeCount, objectSize)
if err != nil {
e2elog.Failf("failed to validate stripe for clone: %v", err)
}
// delete snapshot
err = deleteSnapshot(&snap, deployTimeout)
if err != nil {
e2elog.Failf("failed to delete snapshot: %v", err)
}
// delete clone pvc
err = deletePVCAndValidatePV(f.ClientSet, pvcClone, deployTimeout)
if err != nil {
e2elog.Failf("failed to delete PVC: %v", err)
}
pvcSmartClone, err := loadPVC(pvcSmartClonePath)
if err != nil {
e2elog.Failf("failed to load pvcSmartClone: %v", err)
}
pvcSmartClone.Namespace = f.UniqueName
err = createPVCAndvalidatePV(f.ClientSet, pvcSmartClone, deployTimeout)
if err != nil {
e2elog.Failf("failed to create pvc: %v", err)
}
// validate created backend rbd images
// parent pvc + temp clone + clone
totalImages = 3
validateRBDImageCount(f, totalImages, defaultRBDPool)
validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType)
err = validateStripe(f, pvcSmartClone, stripeUnit, stripeCount, objectSize)
if err != nil {
e2elog.Failf("failed to validate stripe for clone: %v", err)
}
// delete parent pvc
err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout)
if err != nil {
e2elog.Failf("failed to delete PVC: %v", err)
}
// delete clone pvc
err = deletePVCAndValidatePV(f.ClientSet, pvcSmartClone, deployTimeout)
if err != nil {
e2elog.Failf("failed to delete PVC: %v", err)
}
// validate created backend rbd images
validateRBDImageCount(f, 0, defaultRBDPool)
validateOmapCount(f, 0, rbdType, defaultRBDPool, volumesType)
})
// Make sure this should be last testcase in this file, because // Make sure this should be last testcase in this file, because
// it deletes pool // it deletes pool
By("Create a PVC and delete PVC when backend pool deleted", func() { By("Create a PVC and delete PVC when backend pool deleted", func() {

View File

@ -942,3 +942,69 @@ func waitToRemoveImagesFromTrash(f *framework.Framework, poolName string, t int)
return err return err
} }
// imageInfo strongly typed JSON spec for image info.
type imageInfo struct {
Name string `json:"name"`
StripeUnit int `json:"stripe_unit"`
StripeCount int `json:"stripe_count"`
ObjectSize int `json:"object_size"`
}
// getImageInfo queries rbd about the given image and returns its metadata, and returns
// error if provided image is not found.
func getImageInfo(f *framework.Framework, imageName, poolName string) (imageInfo, error) {
// rbd --format=json info [image-spec | snap-spec]
var imgInfo imageInfo
stdOut, stdErr, err := execCommandInToolBoxPod(
f,
fmt.Sprintf("rbd info %s %s --format json", rbdOptions(poolName), imageName),
rookNamespace)
if err != nil {
return imgInfo, fmt.Errorf("failed to get rbd info: %w", err)
}
if stdErr != "" {
return imgInfo, fmt.Errorf("failed to get rbd info: %v", stdErr)
}
err = json.Unmarshal([]byte(stdOut), &imgInfo)
if err != nil {
return imgInfo, fmt.Errorf("unmarshal failed: %w. raw buffer response: %s",
err, stdOut)
}
return imgInfo, nil
}
// validateStripe validate the stripe count, stripe unit and object size of the
// image.
func validateStripe(f *framework.Framework,
pvc *v1.PersistentVolumeClaim,
stripeUnit,
stripeCount,
objectSize int,
) error {
imageData, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f)
if err != nil {
return err
}
imgInfo, err := getImageInfo(f, imageData.imageName, defaultRBDPool)
if err != nil {
return err
}
if imgInfo.ObjectSize != objectSize {
return fmt.Errorf("objectSize %d does not match expected %d", imgInfo.ObjectSize, objectSize)
}
if imgInfo.StripeUnit != stripeUnit {
return fmt.Errorf("stripeUnit %d does not match expected %d", imgInfo.StripeUnit, stripeUnit)
}
if imgInfo.StripeCount != stripeCount {
return fmt.Errorf("stripeCount %d does not match expected %d", imgInfo.StripeCount, stripeCount)
}
return nil
}

View File

@ -134,6 +134,14 @@ parameters:
# {"domainLabel":"zone","value":"zone1"}]} # {"domainLabel":"zone","value":"zone1"}]}
# ] # ]
# Image striping, Refer https://docs.ceph.com/en/latest/man/8/rbd/#striping
# For more details
# (optional) stripe unit in bytes.
# stripeUnit: <>
# (optional) objects to stripe over before looping.
# stripeCount: <>
# (optional) The object size in bytes.
# objectSize: <>
reclaimPolicy: Delete reclaimPolicy: Delete
allowVolumeExpansion: true allowVolumeExpansion: true
mountOptions: mountOptions:

View File

@ -20,6 +20,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"strconv"
csicommon "github.com/ceph/ceph-csi/internal/csi-common" csicommon "github.com/ceph/ceph-csi/internal/csi-common"
"github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util"
@ -94,6 +95,43 @@ func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.Crea
return err return err
} }
err = validateStriping(req.Parameters)
if err != nil {
return status.Error(codes.InvalidArgument, err.Error())
}
return nil
}
func validateStriping(parameters map[string]string) error {
stripeUnit := parameters["stripeUnit"]
stripeCount := parameters["stripeCount"]
if stripeUnit != "" && stripeCount == "" {
return errors.New("stripeCount must be specified when stripeUnit is specified")
}
if stripeUnit == "" && stripeCount != "" {
return errors.New("stripeUnit must be specified when stripeCount is specified")
}
objectSize := parameters["objectSize"]
if objectSize != "" {
objSize, err := strconv.ParseUint(objectSize, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse objectSize %s: %w", objectSize, err)
}
// check objectSize is power of 2
/*
Take 2^3=8 for example.
x & (x-1)
8 & 7
1000 & 0111 = 0000
*/
if objSize == 0 || (objSize&(objSize-1)) != 0 {
return fmt.Errorf("objectSize %s is not power of 2", objectSize)
}
}
return nil return nil
} }

View File

@ -0,0 +1,88 @@
/*
Copyright 2022 The Ceph-CSI Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package rbd
import "testing"
func TestValidateStriping(t *testing.T) {
t.Parallel()
tests := []struct {
name string
parameters map[string]string
wantErr bool
}{
{
name: "when stripeUnit is not specified",
parameters: map[string]string{
"stripeUnit": "",
"stripeCount": "10",
"objectSize": "2",
},
wantErr: true,
},
{
name: "when stripeCount is not specified",
parameters: map[string]string{
"stripeUnit": "4096",
"stripeCount": "",
"objectSize": "2",
},
wantErr: true,
},
{
name: "when objectSize is not power of 2",
parameters: map[string]string{
"stripeUnit": "4096",
"stripeCount": "8",
"objectSize": "3",
},
wantErr: true,
},
{
name: "when objectSize is 0",
parameters: map[string]string{
"stripeUnit": "4096",
"stripeCount": "8",
"objectSize": "0",
},
wantErr: true,
},
{
name: "when valid stripe parameters are specified",
parameters: map[string]string{
"stripeUnit": "4096",
"stripeCount": "8",
"objectSize": "131072",
},
wantErr: false,
},
{
name: "when no stripe parameters are specified",
parameters: map[string]string{},
wantErr: false,
},
}
for _, tt := range tests {
ts := tt
t.Run(ts.name, func(t *testing.T) {
t.Parallel()
if err := validateStriping(ts.parameters); (err != nil) != ts.wantErr {
t.Errorf("validateStriping() error = %v, wantErr %v", err, ts.wantErr)
}
})
}
}

View File

@ -21,6 +21,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"math"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
@ -99,6 +100,11 @@ type rbdImage struct {
// VolSize is the size of the RBD image backing this rbdImage. // VolSize is the size of the RBD image backing this rbdImage.
VolSize int64 VolSize int64
// image striping configurations.
StripeCount uint64
StripeUnit uint64
ObjectSize uint64
Monitors string Monitors string
// JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is // JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is
// stored // stored
@ -408,27 +414,19 @@ func (rs *rbdSnapshot) String() string {
// createImage creates a new ceph image with provision and volume options. // createImage creates a new ceph image with provision and volume options.
func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error { func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error {
volSzMiB := fmt.Sprintf("%dM", util.RoundOffVolSize(pOpts.VolSize)) volSzMiB := fmt.Sprintf("%dM", util.RoundOffVolSize(pOpts.VolSize))
options := librbd.NewRbdImageOptions()
logMsg := "rbd: create %s size %s (features: %s) using mon %s" log.DebugLog(ctx, "rbd: create %s size %s (features: %s) using mon %s",
if pOpts.DataPool != "" {
logMsg += fmt.Sprintf(", data pool %s", pOpts.DataPool)
err := options.SetString(librbd.RbdImageOptionDataPool, pOpts.DataPool)
if err != nil {
return fmt.Errorf("failed to set data pool: %w", err)
}
}
log.DebugLog(ctx, logMsg,
pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors) pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors)
if pOpts.ImageFeatureSet != 0 { options := librbd.NewRbdImageOptions()
err := options.SetUint64(librbd.RbdImageOptionFeatures, uint64(pOpts.ImageFeatureSet)) defer options.Destroy()
if err != nil {
return fmt.Errorf("failed to set image features: %w", err) err := pOpts.setImageOptions(ctx, options)
} if err != nil {
return err
} }
err := pOpts.Connect(cr) err = pOpts.Connect(cr)
if err != nil { if err != nil {
return err return err
} }
@ -1280,9 +1278,40 @@ func genVolFromVolumeOptions(
rbdVol.Mounter) rbdVol.Mounter)
rbdVol.DisableInUseChecks = disableInUseChecks rbdVol.DisableInUseChecks = disableInUseChecks
err = rbdVol.setStripeConfiguration(volOptions)
if err != nil {
return nil, err
}
return rbdVol, nil return rbdVol, nil
} }
func (ri *rbdImage) setStripeConfiguration(options map[string]string) error {
var err error
if val, ok := options["stripeUnit"]; ok {
ri.StripeUnit, err = strconv.ParseUint(val, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse stripeUnit %s: %w", val, err)
}
}
if val, ok := options["stripeCount"]; ok {
ri.StripeCount, err = strconv.ParseUint(val, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse stripeCount %s: %w", val, err)
}
}
if val, ok := options["objectSize"]; ok {
ri.ObjectSize, err = strconv.ParseUint(val, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse objectSize %s: %w", val, err)
}
}
return nil
}
func (rv *rbdVolume) validateImageFeatures(imageFeatures string) error { func (rv *rbdVolume) validateImageFeatures(imageFeatures string) error {
// It is possible for image features to be an empty string which // It is possible for image features to be an empty string which
// the Go split function would return a single item array with // the Go split function would return a single item array with
@ -1384,7 +1413,8 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot(
parentVol *rbdVolume, parentVol *rbdVolume,
) error { ) error {
var err error var err error
logMsg := "rbd: clone %s %s (features: %s) using mon %s" log.DebugLog(ctx, "rbd: clone %s %s (features: %s) using mon %s",
pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors)
err = parentVol.openIoctx() err = parentVol.openIoctx()
if err != nil { if err != nil {
@ -1397,30 +1427,15 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot(
options := librbd.NewRbdImageOptions() options := librbd.NewRbdImageOptions()
defer options.Destroy() defer options.Destroy()
err = rv.setImageOptions(ctx, options)
if rv.DataPool != "" { if err != nil {
logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) return err
err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool)
if err != nil {
return fmt.Errorf("failed to set data pool: %w", err)
}
}
log.DebugLog(ctx, logMsg,
pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors)
if rv.ImageFeatureSet != 0 {
err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet))
if err != nil {
return fmt.Errorf("failed to set image features: %w", err)
}
} }
err = options.SetUint64(librbd.ImageOptionCloneFormat, 2) err = options.SetUint64(librbd.ImageOptionCloneFormat, 2)
if err != nil { if err != nil {
return fmt.Errorf("failed to set image features: %w", err) return err
} }
// As the clone is yet to be created, open the Ioctx. // As the clone is yet to be created, open the Ioctx.
err = rv.openIoctx() err = rv.openIoctx()
if err != nil { if err != nil {
@ -1461,6 +1476,52 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot(
return nil return nil
} }
// setImageOptions sets the image options.
func (rv *rbdVolume) setImageOptions(ctx context.Context, options *librbd.ImageOptions) error {
var err error
logMsg := fmt.Sprintf("setting image options on %s", rv)
if rv.DataPool != "" {
logMsg += fmt.Sprintf(", data pool %s", rv.DataPool)
err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool)
if err != nil {
return fmt.Errorf("failed to set data pool: %w", err)
}
}
if rv.ImageFeatureSet != 0 {
err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet))
if err != nil {
return fmt.Errorf("failed to set image features: %w", err)
}
}
if rv.StripeCount != 0 {
logMsg += fmt.Sprintf(", stripe count %d, stripe unit %d", rv.StripeCount, rv.StripeUnit)
err = options.SetUint64(librbd.RbdImageOptionStripeCount, rv.StripeCount)
if err != nil {
return fmt.Errorf("failed to set stripe count: %w", err)
}
err = options.SetUint64(librbd.RbdImageOptionStripeUnit, rv.StripeUnit)
if err != nil {
return fmt.Errorf("failed to set stripe unit: %w", err)
}
}
if rv.ObjectSize != 0 {
order := uint64(math.Log2(float64(rv.ObjectSize)))
logMsg += fmt.Sprintf(", object size %d, order %d", rv.ObjectSize, order)
err = options.SetUint64(librbd.RbdImageOptionOrder, order)
if err != nil {
return fmt.Errorf("failed to set object size: %w", err)
}
}
log.DebugLog(ctx, logMsg)
return nil
}
// getImageInfo queries rbd about the given image and returns its metadata, and returns // getImageInfo queries rbd about the given image and returns its metadata, and returns
// ErrImageNotFound if provided image is not found. // ErrImageNotFound if provided image is not found.
func (ri *rbdImage) getImageInfo() error { func (ri *rbdImage) getImageInfo() error {