rbd: add capability to automatically enable read affinity

This commit makes use of crush location labels from node
labels to supply `crush_location` and `read_from_replica=localize`
options during rbd map cmd. Using these options, ceph
will be able to redirect reads to the closest OSD,
improving performance.

Signed-off-by: Rakshith R <rar@redhat.com>
This commit is contained in:
Rakshith R
2023-02-02 15:16:37 +05:30
committed by mergify[bot]
parent 6e6cddb096
commit 95682522ee
7 changed files with 401 additions and 21 deletions

View File

@ -71,11 +71,19 @@ func NewReplicationServer(c *rbd.ControllerServer) *rbd.ReplicationServer {
}
// NewNodeServer initialize a node server for rbd CSI driver.
func NewNodeServer(d *csicommon.CSIDriver, t string, topology map[string]string) (*rbd.NodeServer, error) {
return &rbd.NodeServer{
func NewNodeServer(
d *csicommon.CSIDriver,
t string,
topology map[string]string,
crushLocationMap map[string]string,
) (*rbd.NodeServer, error) {
ns := rbd.NodeServer{
DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology),
VolumeLocks: util.NewVolumeLocks(),
}, nil
}
ns.SetReadAffinityMapOptions(crushLocationMap)
return &ns, nil
}
// Run start a non-blocking grpc controller,node and identityserver for
@ -84,9 +92,10 @@ func NewNodeServer(d *csicommon.CSIDriver, t string, topology map[string]string)
// This also configures and starts a new CSI-Addons service, by calling
// setupCSIAddonsServer().
func (r *Driver) Run(conf *util.Config) {
var err error
var topology map[string]string
var (
err error
topology, crushLocationMap map[string]string
)
// update clone soft and hard limit
rbd.SetGlobalInt("rbdHardMaxCloneDepth", conf.RbdHardMaxCloneDepth)
rbd.SetGlobalInt("rbdSoftMaxCloneDepth", conf.RbdSoftMaxCloneDepth)
@ -128,6 +137,13 @@ func (r *Driver) Run(conf *util.Config) {
})
}
if conf.EnableReadAffinity {
crushLocationMap, err = util.GetCrushLocationMap(conf.CrushLocationLabels, conf.NodeID)
if err != nil {
log.FatalLogMsg(err.Error())
}
}
// Create GRPC servers
r.ids = NewIdentityServer(r.cd)
@ -136,7 +152,7 @@ func (r *Driver) Run(conf *util.Config) {
if err != nil {
log.FatalLogMsg(err.Error())
}
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology)
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology, crushLocationMap)
if err != nil {
log.FatalLogMsg("failed to start node server, err %v\n", err)
}
@ -163,17 +179,6 @@ func (r *Driver) Run(conf *util.Config) {
"and replaced by CSI-Addons, see https://github.com/ceph/ceph-csi/issues/3314 for more details")
r.rs = NewReplicationServer(r.cs)
}
if !conf.IsControllerServer && !conf.IsNodeServer {
topology, err = util.GetTopologyFromDomainLabels(conf.DomainLabels, conf.NodeID, conf.DriverName)
if err != nil {
log.FatalLogMsg(err.Error())
}
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology)
if err != nil {
log.FatalLogMsg("failed to start node server, err %v\n", err)
}
r.cs = NewControllerServer(r.cd)
}
s := csicommon.NewNonBlockingGRPCServer()
srv := csicommon.Servers{

View File

@ -45,6 +45,8 @@ type NodeServer struct {
// A map storing all volumes with ongoing operations so that additional operations
// for that same volume (as defined by VolumeID) return an Aborted error
VolumeLocks *util.VolumeLocks
// readAffinityMapOptions contains map options to enable read affinity.
readAffinityMapOptions string
}
// stageTransaction struct represents the state a transaction was when it either completed
@ -143,7 +145,7 @@ func healerStageTransaction(ctx context.Context, cr *util.Credentials, volOps *r
// this function also receive the credentials and secrets args as it differs in its data.
// The credentials are used directly by functions like voljournal.Connect() and other functions
// like genVolFromVolumeOptions() make use of secrets.
func populateRbdVol(
func (ns *NodeServer) populateRbdVol(
ctx context.Context,
req *csi.NodeStageVolumeRequest,
cr *util.Credentials,
@ -250,6 +252,7 @@ func populateRbdVol(
if err != nil {
return nil, err
}
ns.appendReadAffinityMapOptions(rv)
rv.VolID = volID
@ -265,6 +268,19 @@ func populateRbdVol(
return rv, err
}
// appendReadAffinityMapOptions appends readAffinityMapOptions to mapOptions
// if mounter is rbdDefaultMounter and readAffinityMapOptions is not empty.
func (ns NodeServer) appendReadAffinityMapOptions(rv *rbdVolume) {
switch {
case ns.readAffinityMapOptions == "" || rv.Mounter != rbdDefaultMounter:
return
case rv.MapOptions != "":
rv.MapOptions += "," + ns.readAffinityMapOptions
default:
rv.MapOptions = ns.readAffinityMapOptions
}
}
// NodeStageVolume mounts the volume to a staging path on the node.
// Implementation notes:
// - stagingTargetPath is the directory passed in the request where the volume needs to be staged
@ -318,7 +334,7 @@ func (ns *NodeServer) NodeStageVolume(
}
isStaticVol := parseBoolOption(ctx, req.GetVolumeContext(), staticVol, false)
rv, err := populateRbdVol(ctx, req, cr)
rv, err := ns.populateRbdVol(ctx, req, cr)
if err != nil {
return nil, err
}
@ -1349,3 +1365,22 @@ func getDeviceSize(ctx context.Context, devicePath string) (uint64, error) {
return size, nil
}
func (ns *NodeServer) SetReadAffinityMapOptions(crushLocationMap map[string]string) {
if len(crushLocationMap) == 0 {
return
}
var b strings.Builder
b.WriteString("read_from_replica=localize,crush_location=")
first := true
for key, val := range crushLocationMap {
if first {
b.WriteString(fmt.Sprintf("%s:%s", key, val))
first = false
} else {
b.WriteString(fmt.Sprintf("|%s:%s", key, val))
}
}
ns.readAffinityMapOptions = b.String()
}

View File

@ -21,6 +21,7 @@ import (
"testing"
"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/stretchr/testify/assert"
)
func TestGetStagingPath(t *testing.T) {
@ -105,3 +106,141 @@ func TestParseBoolOption(t *testing.T) {
}
}
}
func TestNodeServer_SetReadAffinityMapOptions(t *testing.T) {
t.Parallel()
tests := []struct {
name string
crushLocationmap map[string]string
wantAny []string
}{
{
name: "nil crushLocationmap",
crushLocationmap: nil,
wantAny: []string{""},
},
{
name: "empty crushLocationmap",
crushLocationmap: map[string]string{},
wantAny: []string{""},
},
{
name: "single entry in crushLocationmap",
crushLocationmap: map[string]string{
"region": "east",
},
wantAny: []string{"read_from_replica=localize,crush_location=region:east"},
},
{
name: "multiple entries in crushLocationmap",
crushLocationmap: map[string]string{
"region": "east",
"zone": "east-1",
},
wantAny: []string{
"read_from_replica=localize,crush_location=region:east|zone:east-1",
"read_from_replica=localize,crush_location=zone:east-1|region:east",
},
},
}
for _, tt := range tests {
currentTT := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
ns := &NodeServer{}
ns.SetReadAffinityMapOptions(currentTT.crushLocationmap)
assert.Contains(t, currentTT.wantAny, ns.readAffinityMapOptions)
})
}
}
func TestNodeServer_appendReadAffinityMapOptions(t *testing.T) {
t.Parallel()
type input struct {
mapOptions, readAffinityMapOptions, mounter string
}
tests := []struct {
name string
args input
want string
}{
{
name: "both empty mapOptions and crushLocationMap",
args: input{
mapOptions: "",
readAffinityMapOptions: "",
mounter: rbdDefaultMounter,
},
want: "",
},
{
name: "empty mapOptions, filled crushLocationMap & default mounter",
args: input{
mapOptions: "",
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
mounter: rbdDefaultMounter,
},
want: "read_from_replica=localize,crush_location=region:west",
},
{
name: "empty mapOptions, filled crushLocationMap & non-default mounter",
args: input{
mapOptions: "",
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
mounter: rbdNbdMounter,
},
want: "",
},
{
name: "filled mapOptions, filled crushLocationMap & default mounter",
args: input{
mapOptions: "notrim",
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
mounter: rbdDefaultMounter,
},
want: "notrim,read_from_replica=localize,crush_location=region:west",
},
{
name: "filled mapOptions, filled crushLocationMap & non-default mounter",
args: input{
mapOptions: "notrim",
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
mounter: rbdNbdMounter,
},
want: "notrim",
},
{
name: "filled mapOptions, empty readAffinityMapOptions & default mounter",
args: input{
mapOptions: "notrim",
readAffinityMapOptions: "",
mounter: rbdDefaultMounter,
},
want: "notrim",
},
{
name: "filled mapOptions, empty readAffinityMapOptions & non-default mounter",
args: input{
mapOptions: "notrim",
readAffinityMapOptions: "",
mounter: rbdNbdMounter,
},
want: "notrim",
},
}
for _, tt := range tests {
currentTT := tt
t.Run(currentTT.name, func(t *testing.T) {
t.Parallel()
rv := &rbdVolume{
MapOptions: currentTT.args.mapOptions,
Mounter: currentTT.args.mounter,
}
ns := &NodeServer{
readAffinityMapOptions: currentTT.args.readAffinityMapOptions,
}
ns.appendReadAffinityMapOptions(rv)
assert.Equal(t, currentTT.want, rv.MapOptions)
})
}
}