Merge pull request #373 from black-dragon74/sync-ds-4.17

Bug 2308091: [release-4.17] cephfs: Fix Removal of IPs from blocklist
This commit is contained in:
openshift-merge-bot[bot] 2024-09-09 14:18:00 +00:00 committed by GitHub
commit ffd92a4d9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 258 additions and 34 deletions

View File

@ -22,8 +22,8 @@ curl -X GET http://10.109.65.142:8080/metrics 2>/dev/null | grep csi
csi_liveness 1 csi_liveness 1
``` ```
Promethues can be deployed through the promethues operator described [here](https://coreos.com/operators/prometheus/docs/latest/user-guides/getting-started.html). Prometheus can be deployed through the prometheus operator described [here](https://coreos.com/operators/prometheus/docs/latest/user-guides/getting-started.html).
The [service-monitor](../deploy/service-monitor.yaml) will tell promethues how The [service-monitor](../deploy/service-monitor.yaml) will tell prometheus how
to pull metrics out of CSI. to pull metrics out of CSI.
Each CSI pod has a service to expose the endpoint to prometheus. By default, rbd Each CSI pod has a service to expose the endpoint to prometheus. By default, rbd

View File

@ -125,7 +125,7 @@ parameters:
# "file": Enable file encryption on the mounted filesystem # "file": Enable file encryption on the mounted filesystem
# "block": Encrypt RBD block device # "block": Encrypt RBD block device
# When unspecified assume type "block". "file" and "block" are # When unspecified assume type "block". "file" and "block" are
# mutally exclusive. # mutually exclusive.
# encryptionType: "block" # encryptionType: "block"
# (optional) Use external key management system for encryption passphrases by # (optional) Use external key management system for encryption passphrases by

View File

@ -111,7 +111,7 @@ func (fcs *FenceControllerServer) UnfenceClusterNetwork(
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }
err = nwFence.RemoveNetworkFence(ctx) err = nwFence.RemoveClientEviction(ctx)
if err != nil { if err != nil {
return nil, status.Errorf(codes.Internal, "failed to unfence CIDR block %q: %s", nwFence.Cidr, err.Error()) return nil, status.Errorf(codes.Internal, "failed to unfence CIDR block %q: %s", nwFence.Cidr, err.Error())
} }

View File

@ -52,6 +52,13 @@ type activeClient struct {
Inst string `json:"inst"` Inst string `json:"inst"`
} }
// IPWithNonce represents the structure of an IP with nonce
// as listed by Ceph OSD blocklist.
type IPWithNonce struct {
IP string `json:"ip"`
Nonce string `json:"nonce"`
}
// NewNetworkFence returns a networkFence struct object from the Network fence/unfence request. // NewNetworkFence returns a networkFence struct object from the Network fence/unfence request.
func NewNetworkFence( func NewNetworkFence(
ctx context.Context, ctx context.Context,
@ -256,9 +263,7 @@ func (ac *activeClient) fetchID() (int, error) {
} }
// AddClientEviction blocks access for all the IPs in the CIDR block // AddClientEviction blocks access for all the IPs in the CIDR block
// using client eviction. // using client eviction, it also blocks the entire CIDR.
// blocks the active clients listed in cidr, and the IPs
// for whom there is no active client present too.
func (nf *NetworkFence) AddClientEviction(ctx context.Context) error { func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
evictedIPs := make(map[string]bool) evictedIPs := make(map[string]bool)
// fetch active clients // fetch active clients
@ -269,13 +274,15 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
// iterate through CIDR blocks and check if any active client matches // iterate through CIDR blocks and check if any active client matches
for _, cidr := range nf.Cidr { for _, cidr := range nf.Cidr {
for _, client := range activeClients { for _, client := range activeClients {
clientIP, err := client.fetchIP() var clientIP string
clientIP, err = client.fetchIP()
if err != nil { if err != nil {
return fmt.Errorf("error fetching client IP: %w", err) return fmt.Errorf("error fetching client IP: %w", err)
} }
// check if the clientIP is in the CIDR block // check if the clientIP is in the CIDR block
if isIPInCIDR(ctx, clientIP, cidr) { if isIPInCIDR(ctx, clientIP, cidr) {
clientID, err := client.fetchID() var clientID int
clientID, err = client.fetchID()
if err != nil { if err != nil {
return fmt.Errorf("error fetching client ID: %w", err) return fmt.Errorf("error fetching client ID: %w", err)
} }
@ -291,26 +298,11 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
} }
} }
// blocklist the IPs in CIDR without any active clients // add the range based blocklist for CIDR
for _, cidr := range nf.Cidr { err = nf.AddNetworkFence(ctx)
// check if the CIDR is evicted
// fetch the list of IPs from a CIDR block
hosts, err := getIPRange(cidr)
if err != nil {
return fmt.Errorf("failed to convert CIDR block %s to corresponding IP range: %w", cidr, err)
}
// add ceph blocklist for each IP in the range mentioned by the CIDR
for _, host := range hosts {
if evictedIPs[host] {
continue
}
err = nf.addCephBlocklist(ctx, host, false)
if err != nil { if err != nil {
return err return err
} }
}
}
return nil return nil
} }
@ -358,7 +350,8 @@ func GetCIDR(cidrs Cidrs) ([]string, error) {
} }
// removeCephBlocklist removes an IP from ceph osd blocklist. // removeCephBlocklist removes an IP from ceph osd blocklist.
func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useRange bool) error { // the value of nonce is ignored if useRange is true.
func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip, nonce string, useRange bool) error {
arg := []string{ arg := []string{
"--id", nf.cr.ID, "--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile, "--keyfile=" + nf.cr.KeyFile,
@ -368,7 +361,15 @@ func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useR
if useRange { if useRange {
cmd = append(cmd, "range") cmd = append(cmd, "range")
} }
// If nonce is not empty and we are not using
// range based blocks, we need to add the nonce
if nonce != "" && !useRange {
cmd = append(cmd, "rm", fmt.Sprintf("%s:0/%s", ip, nonce))
} else {
cmd = append(cmd, "rm", ip) cmd = append(cmd, "rm", ip)
}
cmd = append(cmd, arg...) cmd = append(cmd, arg...)
_, stdErr, err := util.ExecCommand(ctx, "ceph", cmd...) _, stdErr, err := util.ExecCommand(ctx, "ceph", cmd...)
@ -396,7 +397,7 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
// try range blocklist cmd, if invalid fallback to // try range blocklist cmd, if invalid fallback to
// iterating through IP range. // iterating through IP range.
if hasBlocklistRangeSupport { if hasBlocklistRangeSupport {
err := nf.removeCephBlocklist(ctx, cidr, true) err := nf.removeCephBlocklist(ctx, cidr, "", true)
if err == nil { if err == nil {
continue continue
} }
@ -412,7 +413,11 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
} }
// remove ceph blocklist for each IP in the range mentioned by the CIDR // remove ceph blocklist for each IP in the range mentioned by the CIDR
for _, host := range hosts { for _, host := range hosts {
err := nf.removeCephBlocklist(ctx, host, false) // 0 is used as nonce here to tell ceph
// to remove the blocklist entry matching: <host>:0/0
// it is same as telling ceph to remove just the IP
// without specifying any port or nonce with it.
err := nf.removeCephBlocklist(ctx, host, "0", false)
if err != nil { if err != nil {
return err return err
} }
@ -421,3 +426,103 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
return nil return nil
} }
func (nf *NetworkFence) RemoveClientEviction(ctx context.Context) error {
// Remove the CIDR block first
err := nf.RemoveNetworkFence(ctx)
if err != nil {
return err
}
// Get the ceph blocklist
blocklist, err := nf.getCephBlocklist(ctx)
if err != nil {
return err
}
// For each CIDR block, remove the IPs in the blocklist
// that fall under the CIDR with nonce
for _, cidr := range nf.Cidr {
hosts := nf.parseBlocklistForCIDR(ctx, blocklist, cidr)
log.DebugLog(ctx, "parsed blocklist for CIDR %s: %+v", cidr, hosts)
for _, host := range hosts {
err := nf.removeCephBlocklist(ctx, host.IP, host.Nonce, false)
if err != nil {
return err
}
}
}
return nil
}
// getCephBlocklist fetches the ceph blocklist and returns it as a string.
func (nf *NetworkFence) getCephBlocklist(ctx context.Context) (string, error) {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
"-m", nf.Monitors,
}
// FIXME: replace the ceph command with go-ceph API in future
cmd := []string{"osd", "blocklist", "ls"}
cmd = append(cmd, arg...)
stdout, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...)
if err != nil {
return "", fmt.Errorf("failed to get the ceph blocklist: %w, stderr: %q", err, stdErr)
}
return stdout, nil
}
// parseBlocklistEntry parses a single entry from the ceph blocklist
// and returns the IPWithNonce.
func (nf *NetworkFence) parseBlocklistEntry(entry string) IPWithNonce {
parts := strings.Fields(entry)
if len(parts) == 0 {
return IPWithNonce{}
}
ipPortNonce := strings.SplitN(parts[0], "/", 2)
if len(ipPortNonce) != 2 {
return IPWithNonce{}
}
ipPort := ipPortNonce[0]
nonce := ipPortNonce[1]
lastColonIndex := strings.LastIndex(ipPortNonce[0], ":")
if lastColonIndex == -1 {
return IPWithNonce{}
}
if len(ipPort) <= lastColonIndex {
return IPWithNonce{}
}
ip := ipPort[:lastColonIndex]
return IPWithNonce{IP: ip, Nonce: nonce}
}
// parseBlocklistForCIDR scans the blocklist for the given CIDR and returns
// the list of IPs that lie within the CIDR along with their nonce.
func (nf *NetworkFence) parseBlocklistForCIDR(ctx context.Context, blocklist, cidr string) []IPWithNonce {
blocklistEntries := strings.Split(blocklist, "\n")
matchingHosts := make([]IPWithNonce, 0)
for _, entry := range blocklistEntries {
entry = strings.TrimSpace(entry)
// Skip unrelated ranged blocks and invalid entries
if strings.Contains(entry, "cidr") || !strings.Contains(entry, "/") {
continue
}
blockedHost := nf.parseBlocklistEntry(entry)
if isIPInCIDR(ctx, blockedHost.IP, cidr) {
matchingHosts = append(matchingHosts, blockedHost)
}
}
return matchingHosts
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package networkfence package networkfence
import ( import (
"context"
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -138,3 +139,121 @@ func TestFetchID(t *testing.T) {
}) })
} }
} }
func TestParseBlocklistEntry(t *testing.T) {
t.Parallel()
tests := []struct {
name string
input string
expected IPWithNonce
}{
{
name: "Valid IP and nonce",
input: "192.168.1.1:6789/abcdef123456",
expected: IPWithNonce{IP: "192.168.1.1", Nonce: "abcdef123456"},
},
{
name: "IPv6 address with full notation",
input: "2001:0db8:0000:0000:0000:8a2e:0370:7334:6789/abc123",
expected: IPWithNonce{IP: "2001:0db8:0000:0000:0000:8a2e:0370:7334", Nonce: "abc123"},
},
{
name: "IPv6 address with compressed zeros",
input: "2001:db8::1428:57ab:6789/def456",
expected: IPWithNonce{IP: "2001:db8::1428:57ab", Nonce: "def456"},
},
{
name: "IPv6 loopback address",
input: "::1:6789/ghi789",
expected: IPWithNonce{IP: "::1", Nonce: "ghi789"},
},
{
name: "IPv6 address with IPv4 mapping",
input: "::ffff:192.0.2.128:6789/jkl012",
expected: IPWithNonce{IP: "::ffff:192.0.2.128", Nonce: "jkl012"},
},
{
name: "IP without port",
input: "10.0.0.1/nonce123",
expected: IPWithNonce{},
},
{
name: "Extra whitespace",
input: " 172.16.0.1:1234/abc123 extra info ",
expected: IPWithNonce{IP: "172.16.0.1", Nonce: "abc123"},
},
}
nf := &NetworkFence{}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
result := nf.parseBlocklistEntry(tt.input)
require.Equal(t, tt.expected, result)
})
}
}
func TestParseBlocklistForCIDR(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
blocklist string
cidr string
expected []IPWithNonce
}{
{
name: "Single IPv4 in CIDR",
blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000
listed 1 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{{IP: "192.168.1.1", Nonce: "1234567"}},
},
{
name: "Multiple IPv4 in CIDR",
blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000
192.168.1.2:0/7654321 expires 2023-07-01 11:00:00.000000
192.168.2.1:0/abcdefg expires 2023-07-01 12:00:00.000000
listed 3 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{
{IP: "192.168.1.1", Nonce: "1234567"},
{IP: "192.168.1.2", Nonce: "7654321"},
},
},
{
name: "IPv6 in CIDR",
blocklist: `2001:db8::1:0/fedcba expires 2023-07-01 10:00:00.000000
2001:db8::2:0/abcdef expires 2023-07-01 11:00:00.000000
listed 2 entries`,
cidr: "2001:db8::/64",
expected: []IPWithNonce{{IP: "2001:db8::1", Nonce: "fedcba"}, {IP: "2001:db8::2", Nonce: "abcdef"}},
},
{
name: "Empty blocklist",
blocklist: `listed 0 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{},
},
{
name: "No matching IPs",
blocklist: `10.0.0.1:0/1234567 expires 2023-07-01 10:00:00.000000
listed 1 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{},
},
}
nf := &NetworkFence{}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
result := nf.parseBlocklistForCIDR(context.TODO(), tc.blocklist, tc.cidr)
require.Equal(t, tc.expected, result)
})
}
}

View File

@ -37,7 +37,7 @@ type checker struct {
// timeout contains the delay (interval + timeout) // timeout contains the delay (interval + timeout)
timeout time.Duration timeout time.Duration
// mutex protects against concurrent access to healty, err and // mutex protects against concurrent access to healthy, err and
// lastUpdate // lastUpdate
mutex *sync.RWMutex mutex *sync.RWMutex

View File

@ -60,7 +60,7 @@ func Test_getCrushLocationMap(t *testing.T) {
want: map[string]string{"zone": "zone1"}, want: map[string]string{"zone": "zone1"},
}, },
{ {
name: "multuple matching crushlocation and node labels", name: "multiple matching crushlocation and node labels",
args: input{ args: input{
crushLocationLabels: "topology.io/zone,topology.io/rack", crushLocationLabels: "topology.io/zone,topology.io/rack",
nodeLabels: map[string]string{ nodeLabels: map[string]string{