mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 02:33:34 +00:00
rbd: add capability to automatically enable read affinity
This commit makes use of crush location labels from node labels to supply `crush_location` and `read_from_replica=localize` options during rbd map cmd. Using these options, ceph will be able to redirect reads to the closest OSD, improving performance. Signed-off-by: Rakshith R <rar@redhat.com>
This commit is contained in:
@ -71,11 +71,19 @@ func NewReplicationServer(c *rbd.ControllerServer) *rbd.ReplicationServer {
|
||||
}
|
||||
|
||||
// NewNodeServer initialize a node server for rbd CSI driver.
|
||||
func NewNodeServer(d *csicommon.CSIDriver, t string, topology map[string]string) (*rbd.NodeServer, error) {
|
||||
return &rbd.NodeServer{
|
||||
func NewNodeServer(
|
||||
d *csicommon.CSIDriver,
|
||||
t string,
|
||||
topology map[string]string,
|
||||
crushLocationMap map[string]string,
|
||||
) (*rbd.NodeServer, error) {
|
||||
ns := rbd.NodeServer{
|
||||
DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology),
|
||||
VolumeLocks: util.NewVolumeLocks(),
|
||||
}, nil
|
||||
}
|
||||
ns.SetReadAffinityMapOptions(crushLocationMap)
|
||||
|
||||
return &ns, nil
|
||||
}
|
||||
|
||||
// Run start a non-blocking grpc controller,node and identityserver for
|
||||
@ -84,9 +92,10 @@ func NewNodeServer(d *csicommon.CSIDriver, t string, topology map[string]string)
|
||||
// This also configures and starts a new CSI-Addons service, by calling
|
||||
// setupCSIAddonsServer().
|
||||
func (r *Driver) Run(conf *util.Config) {
|
||||
var err error
|
||||
var topology map[string]string
|
||||
|
||||
var (
|
||||
err error
|
||||
topology, crushLocationMap map[string]string
|
||||
)
|
||||
// update clone soft and hard limit
|
||||
rbd.SetGlobalInt("rbdHardMaxCloneDepth", conf.RbdHardMaxCloneDepth)
|
||||
rbd.SetGlobalInt("rbdSoftMaxCloneDepth", conf.RbdSoftMaxCloneDepth)
|
||||
@ -128,6 +137,13 @@ func (r *Driver) Run(conf *util.Config) {
|
||||
})
|
||||
}
|
||||
|
||||
if conf.EnableReadAffinity {
|
||||
crushLocationMap, err = util.GetCrushLocationMap(conf.CrushLocationLabels, conf.NodeID)
|
||||
if err != nil {
|
||||
log.FatalLogMsg(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// Create GRPC servers
|
||||
r.ids = NewIdentityServer(r.cd)
|
||||
|
||||
@ -136,7 +152,7 @@ func (r *Driver) Run(conf *util.Config) {
|
||||
if err != nil {
|
||||
log.FatalLogMsg(err.Error())
|
||||
}
|
||||
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology)
|
||||
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology, crushLocationMap)
|
||||
if err != nil {
|
||||
log.FatalLogMsg("failed to start node server, err %v\n", err)
|
||||
}
|
||||
@ -163,17 +179,6 @@ func (r *Driver) Run(conf *util.Config) {
|
||||
"and replaced by CSI-Addons, see https://github.com/ceph/ceph-csi/issues/3314 for more details")
|
||||
r.rs = NewReplicationServer(r.cs)
|
||||
}
|
||||
if !conf.IsControllerServer && !conf.IsNodeServer {
|
||||
topology, err = util.GetTopologyFromDomainLabels(conf.DomainLabels, conf.NodeID, conf.DriverName)
|
||||
if err != nil {
|
||||
log.FatalLogMsg(err.Error())
|
||||
}
|
||||
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology)
|
||||
if err != nil {
|
||||
log.FatalLogMsg("failed to start node server, err %v\n", err)
|
||||
}
|
||||
r.cs = NewControllerServer(r.cd)
|
||||
}
|
||||
|
||||
s := csicommon.NewNonBlockingGRPCServer()
|
||||
srv := csicommon.Servers{
|
||||
|
@ -45,6 +45,8 @@ type NodeServer struct {
|
||||
// A map storing all volumes with ongoing operations so that additional operations
|
||||
// for that same volume (as defined by VolumeID) return an Aborted error
|
||||
VolumeLocks *util.VolumeLocks
|
||||
// readAffinityMapOptions contains map options to enable read affinity.
|
||||
readAffinityMapOptions string
|
||||
}
|
||||
|
||||
// stageTransaction struct represents the state a transaction was when it either completed
|
||||
@ -143,7 +145,7 @@ func healerStageTransaction(ctx context.Context, cr *util.Credentials, volOps *r
|
||||
// this function also receive the credentials and secrets args as it differs in its data.
|
||||
// The credentials are used directly by functions like voljournal.Connect() and other functions
|
||||
// like genVolFromVolumeOptions() make use of secrets.
|
||||
func populateRbdVol(
|
||||
func (ns *NodeServer) populateRbdVol(
|
||||
ctx context.Context,
|
||||
req *csi.NodeStageVolumeRequest,
|
||||
cr *util.Credentials,
|
||||
@ -250,6 +252,7 @@ func populateRbdVol(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ns.appendReadAffinityMapOptions(rv)
|
||||
|
||||
rv.VolID = volID
|
||||
|
||||
@ -265,6 +268,19 @@ func populateRbdVol(
|
||||
return rv, err
|
||||
}
|
||||
|
||||
// appendReadAffinityMapOptions appends readAffinityMapOptions to mapOptions
|
||||
// if mounter is rbdDefaultMounter and readAffinityMapOptions is not empty.
|
||||
func (ns NodeServer) appendReadAffinityMapOptions(rv *rbdVolume) {
|
||||
switch {
|
||||
case ns.readAffinityMapOptions == "" || rv.Mounter != rbdDefaultMounter:
|
||||
return
|
||||
case rv.MapOptions != "":
|
||||
rv.MapOptions += "," + ns.readAffinityMapOptions
|
||||
default:
|
||||
rv.MapOptions = ns.readAffinityMapOptions
|
||||
}
|
||||
}
|
||||
|
||||
// NodeStageVolume mounts the volume to a staging path on the node.
|
||||
// Implementation notes:
|
||||
// - stagingTargetPath is the directory passed in the request where the volume needs to be staged
|
||||
@ -318,7 +334,7 @@ func (ns *NodeServer) NodeStageVolume(
|
||||
}
|
||||
|
||||
isStaticVol := parseBoolOption(ctx, req.GetVolumeContext(), staticVol, false)
|
||||
rv, err := populateRbdVol(ctx, req, cr)
|
||||
rv, err := ns.populateRbdVol(ctx, req, cr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -1349,3 +1365,22 @@ func getDeviceSize(ctx context.Context, devicePath string) (uint64, error) {
|
||||
|
||||
return size, nil
|
||||
}
|
||||
|
||||
func (ns *NodeServer) SetReadAffinityMapOptions(crushLocationMap map[string]string) {
|
||||
if len(crushLocationMap) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString("read_from_replica=localize,crush_location=")
|
||||
first := true
|
||||
for key, val := range crushLocationMap {
|
||||
if first {
|
||||
b.WriteString(fmt.Sprintf("%s:%s", key, val))
|
||||
first = false
|
||||
} else {
|
||||
b.WriteString(fmt.Sprintf("|%s:%s", key, val))
|
||||
}
|
||||
}
|
||||
ns.readAffinityMapOptions = b.String()
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/container-storage-interface/spec/lib/go/csi"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGetStagingPath(t *testing.T) {
|
||||
@ -105,3 +106,141 @@ func TestParseBoolOption(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeServer_SetReadAffinityMapOptions(t *testing.T) {
|
||||
t.Parallel()
|
||||
tests := []struct {
|
||||
name string
|
||||
crushLocationmap map[string]string
|
||||
wantAny []string
|
||||
}{
|
||||
{
|
||||
name: "nil crushLocationmap",
|
||||
crushLocationmap: nil,
|
||||
wantAny: []string{""},
|
||||
},
|
||||
{
|
||||
name: "empty crushLocationmap",
|
||||
crushLocationmap: map[string]string{},
|
||||
wantAny: []string{""},
|
||||
},
|
||||
{
|
||||
name: "single entry in crushLocationmap",
|
||||
crushLocationmap: map[string]string{
|
||||
"region": "east",
|
||||
},
|
||||
wantAny: []string{"read_from_replica=localize,crush_location=region:east"},
|
||||
},
|
||||
{
|
||||
name: "multiple entries in crushLocationmap",
|
||||
crushLocationmap: map[string]string{
|
||||
"region": "east",
|
||||
"zone": "east-1",
|
||||
},
|
||||
wantAny: []string{
|
||||
"read_from_replica=localize,crush_location=region:east|zone:east-1",
|
||||
"read_from_replica=localize,crush_location=zone:east-1|region:east",
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
currentTT := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ns := &NodeServer{}
|
||||
ns.SetReadAffinityMapOptions(currentTT.crushLocationmap)
|
||||
assert.Contains(t, currentTT.wantAny, ns.readAffinityMapOptions)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeServer_appendReadAffinityMapOptions(t *testing.T) {
|
||||
t.Parallel()
|
||||
type input struct {
|
||||
mapOptions, readAffinityMapOptions, mounter string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args input
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "both empty mapOptions and crushLocationMap",
|
||||
args: input{
|
||||
mapOptions: "",
|
||||
readAffinityMapOptions: "",
|
||||
mounter: rbdDefaultMounter,
|
||||
},
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "empty mapOptions, filled crushLocationMap & default mounter",
|
||||
args: input{
|
||||
mapOptions: "",
|
||||
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
|
||||
mounter: rbdDefaultMounter,
|
||||
},
|
||||
want: "read_from_replica=localize,crush_location=region:west",
|
||||
},
|
||||
{
|
||||
name: "empty mapOptions, filled crushLocationMap & non-default mounter",
|
||||
args: input{
|
||||
mapOptions: "",
|
||||
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
|
||||
mounter: rbdNbdMounter,
|
||||
},
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "filled mapOptions, filled crushLocationMap & default mounter",
|
||||
args: input{
|
||||
mapOptions: "notrim",
|
||||
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
|
||||
mounter: rbdDefaultMounter,
|
||||
},
|
||||
want: "notrim,read_from_replica=localize,crush_location=region:west",
|
||||
},
|
||||
{
|
||||
name: "filled mapOptions, filled crushLocationMap & non-default mounter",
|
||||
args: input{
|
||||
mapOptions: "notrim",
|
||||
readAffinityMapOptions: "read_from_replica=localize,crush_location=region:west",
|
||||
mounter: rbdNbdMounter,
|
||||
},
|
||||
want: "notrim",
|
||||
},
|
||||
{
|
||||
name: "filled mapOptions, empty readAffinityMapOptions & default mounter",
|
||||
args: input{
|
||||
mapOptions: "notrim",
|
||||
readAffinityMapOptions: "",
|
||||
mounter: rbdDefaultMounter,
|
||||
},
|
||||
want: "notrim",
|
||||
},
|
||||
{
|
||||
name: "filled mapOptions, empty readAffinityMapOptions & non-default mounter",
|
||||
args: input{
|
||||
mapOptions: "notrim",
|
||||
readAffinityMapOptions: "",
|
||||
mounter: rbdNbdMounter,
|
||||
},
|
||||
want: "notrim",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
currentTT := tt
|
||||
t.Run(currentTT.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
rv := &rbdVolume{
|
||||
MapOptions: currentTT.args.mapOptions,
|
||||
Mounter: currentTT.args.mounter,
|
||||
}
|
||||
ns := &NodeServer{
|
||||
readAffinityMapOptions: currentTT.args.readAffinityMapOptions,
|
||||
}
|
||||
ns.appendReadAffinityMapOptions(rv)
|
||||
assert.Equal(t, currentTT.want, rv.MapOptions)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user