ceph-csi/internal/util/idlocker_test.go

/*
Copyright 2019 ceph-csi authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package util

import (
	"testing"
)

// very basic tests for the moment.
func TestIDLocker(t *testing.T) {
	t.Parallel()
	fakeID := "fake-id"
	locks := NewVolumeLocks()
	// acquire lock for fake-id
	ok := locks.TryAcquire(fakeID)

	if !ok {
		t.Errorf("TryAcquire failed: want (%v), got (%v)",
			true, ok)
	}

	// try to acquire lock  again for fake-id, as lock is already present
	// it should fail
	ok = locks.TryAcquire(fakeID)

	if ok {
		t.Errorf("TryAcquire failed: want (%v), got (%v)",
			false, ok)
	}

	// release the lock for fake-id and try to get lock again, it should pass
	locks.Release(fakeID)
	ok = locks.TryAcquire(fakeID)

	if !ok {
		t.Errorf("TryAcquire failed: want (%v), got (%v)",
			true, ok)
	}
}

func TestOperationLocks(t *testing.T) {
	t.Parallel()
	volumeID := "test-vol"
	lock := NewOperationLock()
	err := lock.GetCloneLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)
	}

	err = lock.GetExpandLock(volumeID)
	if err == nil {
		t.Errorf("expected to fail for GetExpandLock for %s", volumeID)
	}
	lock.ReleaseCloneLock(volumeID)

	// Get multiple clone operation
	err = lock.GetCloneLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)
	}
	err = lock.GetCloneLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)
	}
	err = lock.GetCloneLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)
	}
	// release all clone locks
	lock.ReleaseCloneLock(volumeID)
	lock.ReleaseCloneLock(volumeID)
	lock.ReleaseCloneLock(volumeID)

	// release extra lock it should not cause any issue as the key is already
	// deleted from the map
	lock.ReleaseCloneLock(volumeID)

	// get multiple restore lock
	err = lock.GetRestoreLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire restore lock for %s %s", volumeID, err)
	}
	err = lock.GetRestoreLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire restore lock for %s %s", volumeID, err)
	}
	err = lock.GetRestoreLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire restore lock for %s %s", volumeID, err)
	}
	// release all restore locks
	lock.ReleaseRestoreLock(volumeID)
	lock.ReleaseRestoreLock(volumeID)
	lock.ReleaseRestoreLock(volumeID)

	err = lock.GetSnapshotCreateLock(volumeID)
	if err != nil {
		t.Errorf("failed to acquire createSnapshot lock for %s %s", volumeID, err)
	}
	lock.ReleaseSnapshotCreateLock(volumeID)

	err = lock.GetDeleteLock(volumeID)
	if err != nil {
		t.Errorf("failed to get GetDeleteLock for %s %v", volumeID, err)
	}
	lock.ReleaseDeleteLock(volumeID)
}
Move locks to more granular locking than CPU count based As detailed in issue #279, current lock scheme has hash buckets that are count of CPUs. This causes a lot of contention when parallel requests are made to the CSI plugin. To reduce lock contention, this commit introduces granular locks per identifier. The commit also changes the timeout for gRPC requests to Create and Delete volumes, as the current timeout is 10s (kubernetes documentation says 15s but code defaults are 10s). A virtual setup takes about 12-15s to complete a request at times, that leads to unwanted retries of the same request, hence the increased timeout to enable operation completion with minimal retries. Tests to create PVCs before and after these changes look like so, Before: Default master code + sidecar provisioner --timeout option set to 30 seconds 20 PVCs Creation: 3 runs, 396/391/400 seconds Deletion: 3 runs, 218/271/118 seconds - Once was stalled for more than 8 minutes and cancelled the run After: Current commit + sidecar provisioner --timeout option set to 30 sec 20 PVCs Creation: 3 runs, 42/59/65 seconds Deletion: 3 runs, 32/32/31 seconds Fixes: #279 Signed-off-by: ShyamsundarR <srangana@redhat.com> 2019-06-22 16:43:28 +00:00			`/*`
			`Copyright 2019 ceph-csi authors.`

			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License.`
			`*/`

			`package util`

			`import (`
			`"testing"`
			`)`

cleanup: address godot warnings Top level comments should end in a period Signed-off-by: Yug <yuggupta27@gmail.com> 2020-07-19 12:21:03 +00:00			`// very basic tests for the moment.`
Move locks to more granular locking than CPU count based As detailed in issue #279, current lock scheme has hash buckets that are count of CPUs. This causes a lot of contention when parallel requests are made to the CSI plugin. To reduce lock contention, this commit introduces granular locks per identifier. The commit also changes the timeout for gRPC requests to Create and Delete volumes, as the current timeout is 10s (kubernetes documentation says 15s but code defaults are 10s). A virtual setup takes about 12-15s to complete a request at times, that leads to unwanted retries of the same request, hence the increased timeout to enable operation completion with minimal retries. Tests to create PVCs before and after these changes look like so, Before: Default master code + sidecar provisioner --timeout option set to 30 seconds 20 PVCs Creation: 3 runs, 396/391/400 seconds Deletion: 3 runs, 218/271/118 seconds - Once was stalled for more than 8 minutes and cancelled the run After: Current commit + sidecar provisioner --timeout option set to 30 sec 20 PVCs Creation: 3 runs, 42/59/65 seconds Deletion: 3 runs, 32/32/31 seconds Fixes: #279 Signed-off-by: ShyamsundarR <srangana@redhat.com> 2019-06-22 16:43:28 +00:00			`func TestIDLocker(t *testing.T) {`
cleanup: addresses paralleltest linter The Go linter paralleltest checks that the t.Parallel gets called for the test method and for the range of test cases within the test. Updates: #2025 Signed-off-by: Yati Padia <ypadia@redhat.com> 2021-06-02 09:55:53 +00:00			`t.Parallel()`
Change the logic of locking if any on going opearation is seen,we have to return Abort error message Signed-off-by: Madhu Rajanna <madhupr007@gmail.com> 2019-09-12 04:53:37 +00:00			`fakeID := "fake-id"`
			`locks := NewVolumeLocks()`
			`// acquire lock for fake-id`
			`ok := locks.TryAcquire(fakeID)`

			`if !ok {`
			`t.Errorf("TryAcquire failed: want (%v), got (%v)",`
			`true, ok)`
			`}`
Move locks to more granular locking than CPU count based As detailed in issue #279, current lock scheme has hash buckets that are count of CPUs. This causes a lot of contention when parallel requests are made to the CSI plugin. To reduce lock contention, this commit introduces granular locks per identifier. The commit also changes the timeout for gRPC requests to Create and Delete volumes, as the current timeout is 10s (kubernetes documentation says 15s but code defaults are 10s). A virtual setup takes about 12-15s to complete a request at times, that leads to unwanted retries of the same request, hence the increased timeout to enable operation completion with minimal retries. Tests to create PVCs before and after these changes look like so, Before: Default master code + sidecar provisioner --timeout option set to 30 seconds 20 PVCs Creation: 3 runs, 396/391/400 seconds Deletion: 3 runs, 218/271/118 seconds - Once was stalled for more than 8 minutes and cancelled the run After: Current commit + sidecar provisioner --timeout option set to 30 sec 20 PVCs Creation: 3 runs, 42/59/65 seconds Deletion: 3 runs, 32/32/31 seconds Fixes: #279 Signed-off-by: ShyamsundarR <srangana@redhat.com> 2019-06-22 16:43:28 +00:00
Change the logic of locking if any on going opearation is seen,we have to return Abort error message Signed-off-by: Madhu Rajanna <madhupr007@gmail.com> 2019-09-12 04:53:37 +00:00			`// try to acquire lock again for fake-id, as lock is already present`
			`// it should fail`
			`ok = locks.TryAcquire(fakeID)`
Move locks to more granular locking than CPU count based As detailed in issue #279, current lock scheme has hash buckets that are count of CPUs. This causes a lot of contention when parallel requests are made to the CSI plugin. To reduce lock contention, this commit introduces granular locks per identifier. The commit also changes the timeout for gRPC requests to Create and Delete volumes, as the current timeout is 10s (kubernetes documentation says 15s but code defaults are 10s). A virtual setup takes about 12-15s to complete a request at times, that leads to unwanted retries of the same request, hence the increased timeout to enable operation completion with minimal retries. Tests to create PVCs before and after these changes look like so, Before: Default master code + sidecar provisioner --timeout option set to 30 seconds 20 PVCs Creation: 3 runs, 396/391/400 seconds Deletion: 3 runs, 218/271/118 seconds - Once was stalled for more than 8 minutes and cancelled the run After: Current commit + sidecar provisioner --timeout option set to 30 sec 20 PVCs Creation: 3 runs, 42/59/65 seconds Deletion: 3 runs, 32/32/31 seconds Fixes: #279 Signed-off-by: ShyamsundarR <srangana@redhat.com> 2019-06-22 16:43:28 +00:00
Change the logic of locking if any on going opearation is seen,we have to return Abort error message Signed-off-by: Madhu Rajanna <madhupr007@gmail.com> 2019-09-12 04:53:37 +00:00			`if ok {`
			`t.Errorf("TryAcquire failed: want (%v), got (%v)",`
			`false, ok)`
Move locks to more granular locking than CPU count based As detailed in issue #279, current lock scheme has hash buckets that are count of CPUs. This causes a lot of contention when parallel requests are made to the CSI plugin. To reduce lock contention, this commit introduces granular locks per identifier. The commit also changes the timeout for gRPC requests to Create and Delete volumes, as the current timeout is 10s (kubernetes documentation says 15s but code defaults are 10s). A virtual setup takes about 12-15s to complete a request at times, that leads to unwanted retries of the same request, hence the increased timeout to enable operation completion with minimal retries. Tests to create PVCs before and after these changes look like so, Before: Default master code + sidecar provisioner --timeout option set to 30 seconds 20 PVCs Creation: 3 runs, 396/391/400 seconds Deletion: 3 runs, 218/271/118 seconds - Once was stalled for more than 8 minutes and cancelled the run After: Current commit + sidecar provisioner --timeout option set to 30 sec 20 PVCs Creation: 3 runs, 42/59/65 seconds Deletion: 3 runs, 32/32/31 seconds Fixes: #279 Signed-off-by: ShyamsundarR <srangana@redhat.com> 2019-06-22 16:43:28 +00:00			`}`

Change the logic of locking if any on going opearation is seen,we have to return Abort error message Signed-off-by: Madhu Rajanna <madhupr007@gmail.com> 2019-09-12 04:53:37 +00:00			`// release the lock for fake-id and try to get lock again, it should pass`
			`locks.Release(fakeID)`
			`ok = locks.TryAcquire(fakeID)`

			`if !ok {`
			`t.Errorf("TryAcquire failed: want (%v), got (%v)",`
			`true, ok)`
			`}`
Move locks to more granular locking than CPU count based As detailed in issue #279, current lock scheme has hash buckets that are count of CPUs. This causes a lot of contention when parallel requests are made to the CSI plugin. To reduce lock contention, this commit introduces granular locks per identifier. The commit also changes the timeout for gRPC requests to Create and Delete volumes, as the current timeout is 10s (kubernetes documentation says 15s but code defaults are 10s). A virtual setup takes about 12-15s to complete a request at times, that leads to unwanted retries of the same request, hence the increased timeout to enable operation completion with minimal retries. Tests to create PVCs before and after these changes look like so, Before: Default master code + sidecar provisioner --timeout option set to 30 seconds 20 PVCs Creation: 3 runs, 396/391/400 seconds Deletion: 3 runs, 218/271/118 seconds - Once was stalled for more than 8 minutes and cancelled the run After: Current commit + sidecar provisioner --timeout option set to 30 sec 20 PVCs Creation: 3 runs, 42/59/65 seconds Deletion: 3 runs, 32/32/31 seconds Fixes: #279 Signed-off-by: ShyamsundarR <srangana@redhat.com> 2019-06-22 16:43:28 +00:00			`}`
journal: Add additional operation based locking As we are adding new functionalities like Create/Delete snapshot,Clone from Snapshot and Clone from Volume. with the current implementation, there are only serial operations allowed for this functionalities, for some function we can allow parallel operations like Clone from snapshot and Clone from Volume and Create `N` snapshots on a single volume. Delete Volume: Need to ensure that there is no clone, Snapshot create and Expand volume in progress. Expand Volume: Need to ensure that there is no clone, snapshot create and cloning in progress Delete Snapshot: Need to ensure that there is no cloning in progress Restore Volume/Snapshot: Need to ensure that there is no Expand or delete operation in progress. Signed-off-by: Madhu Rajanna <madhupr007@gmail.com> 2020-07-13 02:37:33 +00:00
			`func TestOperationLocks(t *testing.T) {`
cleanup: addresses paralleltest linter The Go linter paralleltest checks that the t.Parallel gets called for the test method and for the range of test cases within the test. Updates: #2025 Signed-off-by: Yati Padia <ypadia@redhat.com> 2021-06-02 09:55:53 +00:00			`t.Parallel()`
journal: Add additional operation based locking As we are adding new functionalities like Create/Delete snapshot,Clone from Snapshot and Clone from Volume. with the current implementation, there are only serial operations allowed for this functionalities, for some function we can allow parallel operations like Clone from snapshot and Clone from Volume and Create `N` snapshots on a single volume. Delete Volume: Need to ensure that there is no clone, Snapshot create and Expand volume in progress. Expand Volume: Need to ensure that there is no clone, snapshot create and cloning in progress Delete Snapshot: Need to ensure that there is no cloning in progress Restore Volume/Snapshot: Need to ensure that there is no Expand or delete operation in progress. Signed-off-by: Madhu Rajanna <madhupr007@gmail.com> 2020-07-13 02:37:33 +00:00			`volumeID := "test-vol"`
			`lock := NewOperationLock()`
			`err := lock.GetCloneLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)`
			`}`

			`err = lock.GetExpandLock(volumeID)`
			`if err == nil {`
			`t.Errorf("expected to fail for GetExpandLock for %s", volumeID)`
			`}`
			`lock.ReleaseCloneLock(volumeID)`

			`// Get multiple clone operation`
			`err = lock.GetCloneLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)`
			`}`
			`err = lock.GetCloneLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)`
			`}`
			`err = lock.GetCloneLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire clone lock for %s %s", volumeID, err)`
			`}`
			`// release all clone locks`
			`lock.ReleaseCloneLock(volumeID)`
			`lock.ReleaseCloneLock(volumeID)`
			`lock.ReleaseCloneLock(volumeID)`

			`// release extra lock it should not cause any issue as the key is already`
			`// deleted from the map`
			`lock.ReleaseCloneLock(volumeID)`

			`// get multiple restore lock`
			`err = lock.GetRestoreLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire restore lock for %s %s", volumeID, err)`
			`}`
			`err = lock.GetRestoreLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire restore lock for %s %s", volumeID, err)`
			`}`
			`err = lock.GetRestoreLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire restore lock for %s %s", volumeID, err)`
			`}`
			`// release all restore locks`
			`lock.ReleaseRestoreLock(volumeID)`
			`lock.ReleaseRestoreLock(volumeID)`
			`lock.ReleaseRestoreLock(volumeID)`

			`err = lock.GetSnapshotCreateLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to acquire createSnapshot lock for %s %s", volumeID, err)`
			`}`
			`lock.ReleaseSnapshotCreateLock(volumeID)`

			`err = lock.GetDeleteLock(volumeID)`
			`if err != nil {`
			`t.Errorf("failed to get GetDeleteLock for %s %v", volumeID, err)`
			`}`
			`lock.ReleaseDeleteLock(volumeID)`
			`}`