cephfs: implement the logic for unfencing procedure

this commit un-blocklists the clients provided in cidr
for unfencing operation.

Signed-off-by: Riya Singhal <rsinghal@redhat.com>
This commit is contained in:
Riya Singhal 2023-09-29 01:26:05 +05:30 committed by mergify[bot]
parent 1420ad193a
commit 14b06837d0
2 changed files with 55 additions and 7 deletions

View File

@ -89,3 +89,32 @@ func (fcs *FenceControllerServer) FenceClusterNetwork(
return &fence.FenceClusterNetworkResponse{}, nil return &fence.FenceClusterNetworkResponse{}, nil
} }
// UnfenceClusterNetwork unblocks the access to a CIDR block by removing the network fence.
func (fcs *FenceControllerServer) UnfenceClusterNetwork(
ctx context.Context,
req *fence.UnfenceClusterNetworkRequest,
) (*fence.UnfenceClusterNetworkResponse, error) {
err := validateNetworkFenceReq(req.GetCidrs(), req.Parameters)
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
cr, err := util.NewUserCredentials(req.GetSecrets())
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
defer cr.DeleteCredentials()
nwFence, err := nf.NewNetworkFence(ctx, cr, req.Cidrs, req.GetParameters())
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
err = nwFence.RemoveNetworkFence(ctx)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to unfence CIDR block %q: %s", nwFence.Cidr, err.Error())
}
return &fence.UnfenceClusterNetworkResponse{}, nil
}

View File

@ -36,7 +36,7 @@ const (
blocklistTime = "157784760" blocklistTime = "157784760"
invalidCommandStr = "invalid command" invalidCommandStr = "invalid command"
// we can always use mds rank 0, since all the clients have a session with rank-0. // we can always use mds rank 0, since all the clients have a session with rank-0.
mdsRank = "0" mdsRank = 0
) )
// NetworkFence contains the CIDR blocks to be blocked. // NetworkFence contains the CIDR blocks to be blocked.
@ -145,9 +145,15 @@ func (nf *NetworkFence) AddNetworkFence(ctx context.Context) error {
return nil return nil
} }
func listActiveClients(ctx context.Context) ([]activeClient, error) { func (nf *NetworkFence) listActiveClients(ctx context.Context) ([]activeClient, error) {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
"-m", nf.Monitors,
}
// FIXME: replace the ceph command with go-ceph API in future // FIXME: replace the ceph command with go-ceph API in future
cmd := []string{"tell", fmt.Sprintf("mds.%s", mdsRank), "client", "ls"} cmd := []string{"tell", fmt.Sprintf("mds.%d", mdsRank), "client", "ls"}
cmd = append(cmd, arg...)
stdout, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...) stdout, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to list active clients: %w, stderr: %q", err, stdErr) return nil, fmt.Errorf("failed to list active clients: %w, stderr: %q", err, stdErr)
@ -161,9 +167,15 @@ func listActiveClients(ctx context.Context) ([]activeClient, error) {
return activeClients, nil return activeClients, nil
} }
func evictCephFSClient(ctx context.Context, clientID int) error { func (nf *NetworkFence) evictCephFSClient(ctx context.Context, clientID int) error {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
"-m", nf.Monitors,
}
// FIXME: replace the ceph command with go-ceph API in future // FIXME: replace the ceph command with go-ceph API in future
cmd := []string{"tell", fmt.Sprintf("mds.%s", mdsRank), "client", "evict", fmt.Sprintf("id=%d", clientID)} cmd := []string{"tell", fmt.Sprintf("mds.%d", mdsRank), "client", "evict", fmt.Sprintf("id=%d", clientID)}
cmd = append(cmd, arg...)
_, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...) _, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...)
if err != nil { if err != nil {
return fmt.Errorf("failed to evict client %d: %w, stderr: %q", clientID, err, stdErr) return fmt.Errorf("failed to evict client %d: %w, stderr: %q", clientID, err, stdErr)
@ -233,7 +245,7 @@ func (ac *activeClient) fetchID() (int, error) {
func (nf *NetworkFence) AddClientEviction(ctx context.Context) error { func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
evictedIPs := make(map[string]bool) evictedIPs := make(map[string]bool)
// fetch active clients // fetch active clients
activeClients, err := listActiveClients(ctx) activeClients, err := nf.listActiveClients(ctx)
if err != nil { if err != nil {
return err return err
} }
@ -251,7 +263,7 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
return fmt.Errorf("error fetching client ID: %w", err) return fmt.Errorf("error fetching client ID: %w", err)
} }
// evict the client // evict the client
err = evictCephFSClient(ctx, clientID) err = nf.evictCephFSClient(ctx, clientID)
if err != nil { if err != nil {
return fmt.Errorf("error evicting client %d: %w", clientID, err) return fmt.Errorf("error evicting client %d: %w", clientID, err)
} }
@ -353,6 +365,13 @@ func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useR
// RemoveNetworkFence unblocks access for all the IPs in the IP range mentioned via the CIDR block // RemoveNetworkFence unblocks access for all the IPs in the IP range mentioned via the CIDR block
// using a network fence. // using a network fence.
// Unfencing one of the protocols(CephFS or RBD) suggests the node is expected to be recovered, so
// both CephFS and RBD are expected to work again too.
// example:
// Create RBD NetworkFence CR for one IP 10.10.10.10
// Created CephFS NetworkFence CR for IP range but above IP comes in the Range
// Delete the CephFS Network Fence CR to unblocklist the IP
// So now the IP (10.10.10.10) is (un)blocklisted and can be used by both protocols.
func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error { func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
hasBlocklistRangeSupport := true hasBlocklistRangeSupport := true
// for each CIDR block, convert it into a range of IPs so as to undo blocklisting operation. // for each CIDR block, convert it into a range of IPs so as to undo blocklisting operation.