Merge pull request #373 from black-dragon74/sync-ds-4.17

Bug 2308091: [release-4.17] cephfs: Fix Removal of IPs from blocklist
This commit is contained in:
openshift-merge-bot[bot] 2024-09-09 14:18:00 +00:00 committed by GitHub
commit ffd92a4d9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 258 additions and 34 deletions

View File

@ -22,8 +22,8 @@ curl -X GET http://10.109.65.142:8080/metrics 2>/dev/null | grep csi
csi_liveness 1
```
Promethues can be deployed through the promethues operator described [here](https://coreos.com/operators/prometheus/docs/latest/user-guides/getting-started.html).
The [service-monitor](../deploy/service-monitor.yaml) will tell promethues how
Prometheus can be deployed through the prometheus operator described [here](https://coreos.com/operators/prometheus/docs/latest/user-guides/getting-started.html).
The [service-monitor](../deploy/service-monitor.yaml) will tell prometheus how
to pull metrics out of CSI.
Each CSI pod has a service to expose the endpoint to prometheus. By default, rbd

View File

@ -125,7 +125,7 @@ parameters:
# "file": Enable file encryption on the mounted filesystem
# "block": Encrypt RBD block device
# When unspecified assume type "block". "file" and "block" are
# mutally exclusive.
# mutually exclusive.
# encryptionType: "block"
# (optional) Use external key management system for encryption passphrases by

View File

@ -111,7 +111,7 @@ func (fcs *FenceControllerServer) UnfenceClusterNetwork(
return nil, status.Error(codes.Internal, err.Error())
}
err = nwFence.RemoveNetworkFence(ctx)
err = nwFence.RemoveClientEviction(ctx)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to unfence CIDR block %q: %s", nwFence.Cidr, err.Error())
}

View File

@ -52,6 +52,13 @@ type activeClient struct {
Inst string `json:"inst"`
}
// IPWithNonce represents the structure of an IP with nonce
// as listed by Ceph OSD blocklist.
type IPWithNonce struct {
IP string `json:"ip"`
Nonce string `json:"nonce"`
}
// NewNetworkFence returns a networkFence struct object from the Network fence/unfence request.
func NewNetworkFence(
ctx context.Context,
@ -256,9 +263,7 @@ func (ac *activeClient) fetchID() (int, error) {
}
// AddClientEviction blocks access for all the IPs in the CIDR block
// using client eviction.
// blocks the active clients listed in cidr, and the IPs
// for whom there is no active client present too.
// using client eviction, it also blocks the entire CIDR.
func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
evictedIPs := make(map[string]bool)
// fetch active clients
@ -269,13 +274,15 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
// iterate through CIDR blocks and check if any active client matches
for _, cidr := range nf.Cidr {
for _, client := range activeClients {
clientIP, err := client.fetchIP()
var clientIP string
clientIP, err = client.fetchIP()
if err != nil {
return fmt.Errorf("error fetching client IP: %w", err)
}
// check if the clientIP is in the CIDR block
if isIPInCIDR(ctx, clientIP, cidr) {
clientID, err := client.fetchID()
var clientID int
clientID, err = client.fetchID()
if err != nil {
return fmt.Errorf("error fetching client ID: %w", err)
}
@ -291,26 +298,11 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
}
}
// blocklist the IPs in CIDR without any active clients
for _, cidr := range nf.Cidr {
// check if the CIDR is evicted
// fetch the list of IPs from a CIDR block
hosts, err := getIPRange(cidr)
if err != nil {
return fmt.Errorf("failed to convert CIDR block %s to corresponding IP range: %w", cidr, err)
}
// add ceph blocklist for each IP in the range mentioned by the CIDR
for _, host := range hosts {
if evictedIPs[host] {
continue
}
err = nf.addCephBlocklist(ctx, host, false)
// add the range based blocklist for CIDR
err = nf.AddNetworkFence(ctx)
if err != nil {
return err
}
}
}
return nil
}
@ -358,7 +350,8 @@ func GetCIDR(cidrs Cidrs) ([]string, error) {
}
// removeCephBlocklist removes an IP from ceph osd blocklist.
func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useRange bool) error {
// the value of nonce is ignored if useRange is true.
func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip, nonce string, useRange bool) error {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
@ -368,7 +361,15 @@ func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useR
if useRange {
cmd = append(cmd, "range")
}
// If nonce is not empty and we are not using
// range based blocks, we need to add the nonce
if nonce != "" && !useRange {
cmd = append(cmd, "rm", fmt.Sprintf("%s:0/%s", ip, nonce))
} else {
cmd = append(cmd, "rm", ip)
}
cmd = append(cmd, arg...)
_, stdErr, err := util.ExecCommand(ctx, "ceph", cmd...)
@ -396,7 +397,7 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
// try range blocklist cmd, if invalid fallback to
// iterating through IP range.
if hasBlocklistRangeSupport {
err := nf.removeCephBlocklist(ctx, cidr, true)
err := nf.removeCephBlocklist(ctx, cidr, "", true)
if err == nil {
continue
}
@ -412,7 +413,11 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
}
// remove ceph blocklist for each IP in the range mentioned by the CIDR
for _, host := range hosts {
err := nf.removeCephBlocklist(ctx, host, false)
// 0 is used as nonce here to tell ceph
// to remove the blocklist entry matching: <host>:0/0
// it is same as telling ceph to remove just the IP
// without specifying any port or nonce with it.
err := nf.removeCephBlocklist(ctx, host, "0", false)
if err != nil {
return err
}
@ -421,3 +426,103 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
return nil
}
func (nf *NetworkFence) RemoveClientEviction(ctx context.Context) error {
// Remove the CIDR block first
err := nf.RemoveNetworkFence(ctx)
if err != nil {
return err
}
// Get the ceph blocklist
blocklist, err := nf.getCephBlocklist(ctx)
if err != nil {
return err
}
// For each CIDR block, remove the IPs in the blocklist
// that fall under the CIDR with nonce
for _, cidr := range nf.Cidr {
hosts := nf.parseBlocklistForCIDR(ctx, blocklist, cidr)
log.DebugLog(ctx, "parsed blocklist for CIDR %s: %+v", cidr, hosts)
for _, host := range hosts {
err := nf.removeCephBlocklist(ctx, host.IP, host.Nonce, false)
if err != nil {
return err
}
}
}
return nil
}
// getCephBlocklist fetches the ceph blocklist and returns it as a string.
func (nf *NetworkFence) getCephBlocklist(ctx context.Context) (string, error) {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
"-m", nf.Monitors,
}
// FIXME: replace the ceph command with go-ceph API in future
cmd := []string{"osd", "blocklist", "ls"}
cmd = append(cmd, arg...)
stdout, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...)
if err != nil {
return "", fmt.Errorf("failed to get the ceph blocklist: %w, stderr: %q", err, stdErr)
}
return stdout, nil
}
// parseBlocklistEntry parses a single entry from the ceph blocklist
// and returns the IPWithNonce.
func (nf *NetworkFence) parseBlocklistEntry(entry string) IPWithNonce {
parts := strings.Fields(entry)
if len(parts) == 0 {
return IPWithNonce{}
}
ipPortNonce := strings.SplitN(parts[0], "/", 2)
if len(ipPortNonce) != 2 {
return IPWithNonce{}
}
ipPort := ipPortNonce[0]
nonce := ipPortNonce[1]
lastColonIndex := strings.LastIndex(ipPortNonce[0], ":")
if lastColonIndex == -1 {
return IPWithNonce{}
}
if len(ipPort) <= lastColonIndex {
return IPWithNonce{}
}
ip := ipPort[:lastColonIndex]
return IPWithNonce{IP: ip, Nonce: nonce}
}
// parseBlocklistForCIDR scans the blocklist for the given CIDR and returns
// the list of IPs that lie within the CIDR along with their nonce.
func (nf *NetworkFence) parseBlocklistForCIDR(ctx context.Context, blocklist, cidr string) []IPWithNonce {
blocklistEntries := strings.Split(blocklist, "\n")
matchingHosts := make([]IPWithNonce, 0)
for _, entry := range blocklistEntries {
entry = strings.TrimSpace(entry)
// Skip unrelated ranged blocks and invalid entries
if strings.Contains(entry, "cidr") || !strings.Contains(entry, "/") {
continue
}
blockedHost := nf.parseBlocklistEntry(entry)
if isIPInCIDR(ctx, blockedHost.IP, cidr) {
matchingHosts = append(matchingHosts, blockedHost)
}
}
return matchingHosts
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package networkfence
import (
"context"
"testing"
"github.com/stretchr/testify/require"
@ -138,3 +139,121 @@ func TestFetchID(t *testing.T) {
})
}
}
func TestParseBlocklistEntry(t *testing.T) {
t.Parallel()
tests := []struct {
name string
input string
expected IPWithNonce
}{
{
name: "Valid IP and nonce",
input: "192.168.1.1:6789/abcdef123456",
expected: IPWithNonce{IP: "192.168.1.1", Nonce: "abcdef123456"},
},
{
name: "IPv6 address with full notation",
input: "2001:0db8:0000:0000:0000:8a2e:0370:7334:6789/abc123",
expected: IPWithNonce{IP: "2001:0db8:0000:0000:0000:8a2e:0370:7334", Nonce: "abc123"},
},
{
name: "IPv6 address with compressed zeros",
input: "2001:db8::1428:57ab:6789/def456",
expected: IPWithNonce{IP: "2001:db8::1428:57ab", Nonce: "def456"},
},
{
name: "IPv6 loopback address",
input: "::1:6789/ghi789",
expected: IPWithNonce{IP: "::1", Nonce: "ghi789"},
},
{
name: "IPv6 address with IPv4 mapping",
input: "::ffff:192.0.2.128:6789/jkl012",
expected: IPWithNonce{IP: "::ffff:192.0.2.128", Nonce: "jkl012"},
},
{
name: "IP without port",
input: "10.0.0.1/nonce123",
expected: IPWithNonce{},
},
{
name: "Extra whitespace",
input: " 172.16.0.1:1234/abc123 extra info ",
expected: IPWithNonce{IP: "172.16.0.1", Nonce: "abc123"},
},
}
nf := &NetworkFence{}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
result := nf.parseBlocklistEntry(tt.input)
require.Equal(t, tt.expected, result)
})
}
}
func TestParseBlocklistForCIDR(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
blocklist string
cidr string
expected []IPWithNonce
}{
{
name: "Single IPv4 in CIDR",
blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000
listed 1 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{{IP: "192.168.1.1", Nonce: "1234567"}},
},
{
name: "Multiple IPv4 in CIDR",
blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000
192.168.1.2:0/7654321 expires 2023-07-01 11:00:00.000000
192.168.2.1:0/abcdefg expires 2023-07-01 12:00:00.000000
listed 3 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{
{IP: "192.168.1.1", Nonce: "1234567"},
{IP: "192.168.1.2", Nonce: "7654321"},
},
},
{
name: "IPv6 in CIDR",
blocklist: `2001:db8::1:0/fedcba expires 2023-07-01 10:00:00.000000
2001:db8::2:0/abcdef expires 2023-07-01 11:00:00.000000
listed 2 entries`,
cidr: "2001:db8::/64",
expected: []IPWithNonce{{IP: "2001:db8::1", Nonce: "fedcba"}, {IP: "2001:db8::2", Nonce: "abcdef"}},
},
{
name: "Empty blocklist",
blocklist: `listed 0 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{},
},
{
name: "No matching IPs",
blocklist: `10.0.0.1:0/1234567 expires 2023-07-01 10:00:00.000000
listed 1 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{},
},
}
nf := &NetworkFence{}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
result := nf.parseBlocklistForCIDR(context.TODO(), tc.blocklist, tc.cidr)
require.Equal(t, tc.expected, result)
})
}
}

View File

@ -37,7 +37,7 @@ type checker struct {
// timeout contains the delay (interval + timeout)
timeout time.Duration
// mutex protects against concurrent access to healty, err and
// mutex protects against concurrent access to healthy, err and
// lastUpdate
mutex *sync.RWMutex

View File

@ -60,7 +60,7 @@ func Test_getCrushLocationMap(t *testing.T) {
want: map[string]string{"zone": "zone1"},
},
{
name: "multuple matching crushlocation and node labels",
name: "multiple matching crushlocation and node labels",
args: input{
crushLocationLabels: "topology.io/zone,topology.io/rack",
nodeLabels: map[string]string{