mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-01-18 10:49:30 +00:00
9c8de9471e
update kubernetes and its dependencies to v1.26.1 Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
460 lines
14 KiB
Go
460 lines
14 KiB
Go
//go:build linux
|
|
// +build linux
|
|
|
|
package fsnotify
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"unsafe"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// Watcher watches a set of paths, delivering events on a channel.
|
|
//
|
|
// A watcher should not be copied (e.g. pass it by pointer, rather than by
|
|
// value).
|
|
//
|
|
// # Linux notes
|
|
//
|
|
// When a file is removed a Remove event won't be emitted until all file
|
|
// descriptors are closed, and deletes will always emit a Chmod. For example:
|
|
//
|
|
// fp := os.Open("file")
|
|
// os.Remove("file") // Triggers Chmod
|
|
// fp.Close() // Triggers Remove
|
|
//
|
|
// This is the event that inotify sends, so not much can be changed about this.
|
|
//
|
|
// The fs.inotify.max_user_watches sysctl variable specifies the upper limit
|
|
// for the number of watches per user, and fs.inotify.max_user_instances
|
|
// specifies the maximum number of inotify instances per user. Every Watcher you
|
|
// create is an "instance", and every path you add is a "watch".
|
|
//
|
|
// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and
|
|
// /proc/sys/fs/inotify/max_user_instances
|
|
//
|
|
// To increase them you can use sysctl or write the value to the /proc file:
|
|
//
|
|
// # Default values on Linux 5.18
|
|
// sysctl fs.inotify.max_user_watches=124983
|
|
// sysctl fs.inotify.max_user_instances=128
|
|
//
|
|
// To make the changes persist on reboot edit /etc/sysctl.conf or
|
|
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
|
|
// your distro's documentation):
|
|
//
|
|
// fs.inotify.max_user_watches=124983
|
|
// fs.inotify.max_user_instances=128
|
|
//
|
|
// Reaching the limit will result in a "no space left on device" or "too many open
|
|
// files" error.
|
|
//
|
|
// # kqueue notes (macOS, BSD)
|
|
//
|
|
// kqueue requires opening a file descriptor for every file that's being watched;
|
|
// so if you're watching a directory with five files then that's six file
|
|
// descriptors. You will run in to your system's "max open files" limit faster on
|
|
// these platforms.
|
|
//
|
|
// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to
|
|
// control the maximum number of open files, as well as /etc/login.conf on BSD
|
|
// systems.
|
|
//
|
|
// # macOS notes
|
|
//
|
|
// Spotlight indexing on macOS can result in multiple events (see [#15]). A
|
|
// temporary workaround is to add your folder(s) to the "Spotlight Privacy
|
|
// Settings" until we have a native FSEvents implementation (see [#11]).
|
|
//
|
|
// [#11]: https://github.com/fsnotify/fsnotify/issues/11
|
|
// [#15]: https://github.com/fsnotify/fsnotify/issues/15
|
|
type Watcher struct {
|
|
// Events sends the filesystem change events.
|
|
//
|
|
// fsnotify can send the following events; a "path" here can refer to a
|
|
// file, directory, symbolic link, or special file like a FIFO.
|
|
//
|
|
// fsnotify.Create A new path was created; this may be followed by one
|
|
// or more Write events if data also gets written to a
|
|
// file.
|
|
//
|
|
// fsnotify.Remove A path was removed.
|
|
//
|
|
// fsnotify.Rename A path was renamed. A rename is always sent with the
|
|
// old path as Event.Name, and a Create event will be
|
|
// sent with the new name. Renames are only sent for
|
|
// paths that are currently watched; e.g. moving an
|
|
// unmonitored file into a monitored directory will
|
|
// show up as just a Create. Similarly, renaming a file
|
|
// to outside a monitored directory will show up as
|
|
// only a Rename.
|
|
//
|
|
// fsnotify.Write A file or named pipe was written to. A Truncate will
|
|
// also trigger a Write. A single "write action"
|
|
// initiated by the user may show up as one or multiple
|
|
// writes, depending on when the system syncs things to
|
|
// disk. For example when compiling a large Go program
|
|
// you may get hundreds of Write events, so you
|
|
// probably want to wait until you've stopped receiving
|
|
// them (see the dedup example in cmd/fsnotify).
|
|
//
|
|
// fsnotify.Chmod Attributes were changed. On Linux this is also sent
|
|
// when a file is removed (or more accurately, when a
|
|
// link to an inode is removed). On kqueue it's sent
|
|
// and on kqueue when a file is truncated. On Windows
|
|
// it's never sent.
|
|
Events chan Event
|
|
|
|
// Errors sends any errors.
|
|
Errors chan error
|
|
|
|
// Store fd here as os.File.Read() will no longer return on close after
|
|
// calling Fd(). See: https://github.com/golang/go/issues/26439
|
|
fd int
|
|
mu sync.Mutex // Map access
|
|
inotifyFile *os.File
|
|
watches map[string]*watch // Map of inotify watches (key: path)
|
|
paths map[int]string // Map of watched paths (key: watch descriptor)
|
|
done chan struct{} // Channel for sending a "quit message" to the reader goroutine
|
|
doneResp chan struct{} // Channel to respond to Close
|
|
}
|
|
|
|
// NewWatcher creates a new Watcher.
|
|
func NewWatcher() (*Watcher, error) {
|
|
// Create inotify fd
|
|
// Need to set the FD to nonblocking mode in order for SetDeadline methods to work
|
|
// Otherwise, blocking i/o operations won't terminate on close
|
|
fd, errno := unix.InotifyInit1(unix.IN_CLOEXEC | unix.IN_NONBLOCK)
|
|
if fd == -1 {
|
|
return nil, errno
|
|
}
|
|
|
|
w := &Watcher{
|
|
fd: fd,
|
|
inotifyFile: os.NewFile(uintptr(fd), ""),
|
|
watches: make(map[string]*watch),
|
|
paths: make(map[int]string),
|
|
Events: make(chan Event),
|
|
Errors: make(chan error),
|
|
done: make(chan struct{}),
|
|
doneResp: make(chan struct{}),
|
|
}
|
|
|
|
go w.readEvents()
|
|
return w, nil
|
|
}
|
|
|
|
// Returns true if the event was sent, or false if watcher is closed.
|
|
func (w *Watcher) sendEvent(e Event) bool {
|
|
select {
|
|
case w.Events <- e:
|
|
return true
|
|
case <-w.done:
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Returns true if the error was sent, or false if watcher is closed.
|
|
func (w *Watcher) sendError(err error) bool {
|
|
select {
|
|
case w.Errors <- err:
|
|
return true
|
|
case <-w.done:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func (w *Watcher) isClosed() bool {
|
|
select {
|
|
case <-w.done:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Close removes all watches and closes the events channel.
|
|
func (w *Watcher) Close() error {
|
|
w.mu.Lock()
|
|
if w.isClosed() {
|
|
w.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// Send 'close' signal to goroutine, and set the Watcher to closed.
|
|
close(w.done)
|
|
w.mu.Unlock()
|
|
|
|
// Causes any blocking reads to return with an error, provided the file
|
|
// still supports deadline operations.
|
|
err := w.inotifyFile.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Wait for goroutine to close
|
|
<-w.doneResp
|
|
|
|
return nil
|
|
}
|
|
|
|
// Add starts monitoring the path for changes.
|
|
//
|
|
// A path can only be watched once; attempting to watch it more than once will
|
|
// return an error. Paths that do not yet exist on the filesystem cannot be
|
|
// added. A watch will be automatically removed if the path is deleted.
|
|
//
|
|
// A path will remain watched if it gets renamed to somewhere else on the same
|
|
// filesystem, but the monitor will get removed if the path gets deleted and
|
|
// re-created, or if it's moved to a different filesystem.
|
|
//
|
|
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
|
|
// filesystems (/proc, /sys, etc.) generally don't work.
|
|
//
|
|
// # Watching directories
|
|
//
|
|
// All files in a directory are monitored, including new files that are created
|
|
// after the watcher is started. Subdirectories are not watched (i.e. it's
|
|
// non-recursive).
|
|
//
|
|
// # Watching files
|
|
//
|
|
// Watching individual files (rather than directories) is generally not
|
|
// recommended as many tools update files atomically. Instead of "just" writing
|
|
// to the file a temporary file will be written to first, and if successful the
|
|
// temporary file is moved to to destination removing the original, or some
|
|
// variant thereof. The watcher on the original file is now lost, as it no
|
|
// longer exists.
|
|
//
|
|
// Instead, watch the parent directory and use Event.Name to filter out files
|
|
// you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
|
|
func (w *Watcher) Add(name string) error {
|
|
name = filepath.Clean(name)
|
|
if w.isClosed() {
|
|
return errors.New("inotify instance already closed")
|
|
}
|
|
|
|
var flags uint32 = unix.IN_MOVED_TO | unix.IN_MOVED_FROM |
|
|
unix.IN_CREATE | unix.IN_ATTRIB | unix.IN_MODIFY |
|
|
unix.IN_MOVE_SELF | unix.IN_DELETE | unix.IN_DELETE_SELF
|
|
|
|
w.mu.Lock()
|
|
defer w.mu.Unlock()
|
|
watchEntry := w.watches[name]
|
|
if watchEntry != nil {
|
|
flags |= watchEntry.flags | unix.IN_MASK_ADD
|
|
}
|
|
wd, errno := unix.InotifyAddWatch(w.fd, name, flags)
|
|
if wd == -1 {
|
|
return errno
|
|
}
|
|
|
|
if watchEntry == nil {
|
|
w.watches[name] = &watch{wd: uint32(wd), flags: flags}
|
|
w.paths[wd] = name
|
|
} else {
|
|
watchEntry.wd = uint32(wd)
|
|
watchEntry.flags = flags
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Remove stops monitoring the path for changes.
|
|
//
|
|
// Directories are always removed non-recursively. For example, if you added
|
|
// /tmp/dir and /tmp/dir/subdir then you will need to remove both.
|
|
//
|
|
// Removing a path that has not yet been added returns [ErrNonExistentWatch].
|
|
func (w *Watcher) Remove(name string) error {
|
|
name = filepath.Clean(name)
|
|
|
|
// Fetch the watch.
|
|
w.mu.Lock()
|
|
defer w.mu.Unlock()
|
|
watch, ok := w.watches[name]
|
|
|
|
// Remove it from inotify.
|
|
if !ok {
|
|
return fmt.Errorf("%w: %s", ErrNonExistentWatch, name)
|
|
}
|
|
|
|
// We successfully removed the watch if InotifyRmWatch doesn't return an
|
|
// error, we need to clean up our internal state to ensure it matches
|
|
// inotify's kernel state.
|
|
delete(w.paths, int(watch.wd))
|
|
delete(w.watches, name)
|
|
|
|
// inotify_rm_watch will return EINVAL if the file has been deleted;
|
|
// the inotify will already have been removed.
|
|
// watches and pathes are deleted in ignoreLinux() implicitly and asynchronously
|
|
// by calling inotify_rm_watch() below. e.g. readEvents() goroutine receives IN_IGNORE
|
|
// so that EINVAL means that the wd is being rm_watch()ed or its file removed
|
|
// by another thread and we have not received IN_IGNORE event.
|
|
success, errno := unix.InotifyRmWatch(w.fd, watch.wd)
|
|
if success == -1 {
|
|
// TODO: Perhaps it's not helpful to return an error here in every case;
|
|
// The only two possible errors are:
|
|
//
|
|
// - EBADF, which happens when w.fd is not a valid file descriptor
|
|
// of any kind.
|
|
// - EINVAL, which is when fd is not an inotify descriptor or wd
|
|
// is not a valid watch descriptor. Watch descriptors are
|
|
// invalidated when they are removed explicitly or implicitly;
|
|
// explicitly by inotify_rm_watch, implicitly when the file they
|
|
// are watching is deleted.
|
|
return errno
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// WatchList returns all paths added with [Add] (and are not yet removed).
|
|
func (w *Watcher) WatchList() []string {
|
|
w.mu.Lock()
|
|
defer w.mu.Unlock()
|
|
|
|
entries := make([]string, 0, len(w.watches))
|
|
for pathname := range w.watches {
|
|
entries = append(entries, pathname)
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
type watch struct {
|
|
wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall)
|
|
flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags)
|
|
}
|
|
|
|
// readEvents reads from the inotify file descriptor, converts the
|
|
// received events into Event objects and sends them via the Events channel
|
|
func (w *Watcher) readEvents() {
|
|
defer func() {
|
|
close(w.doneResp)
|
|
close(w.Errors)
|
|
close(w.Events)
|
|
}()
|
|
|
|
var (
|
|
buf [unix.SizeofInotifyEvent * 4096]byte // Buffer for a maximum of 4096 raw events
|
|
errno error // Syscall errno
|
|
)
|
|
for {
|
|
// See if we have been closed.
|
|
if w.isClosed() {
|
|
return
|
|
}
|
|
|
|
n, err := w.inotifyFile.Read(buf[:])
|
|
switch {
|
|
case errors.Unwrap(err) == os.ErrClosed:
|
|
return
|
|
case err != nil:
|
|
if !w.sendError(err) {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
if n < unix.SizeofInotifyEvent {
|
|
var err error
|
|
if n == 0 {
|
|
// If EOF is received. This should really never happen.
|
|
err = io.EOF
|
|
} else if n < 0 {
|
|
// If an error occurred while reading.
|
|
err = errno
|
|
} else {
|
|
// Read was too short.
|
|
err = errors.New("notify: short read in readEvents()")
|
|
}
|
|
if !w.sendError(err) {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
var offset uint32
|
|
// We don't know how many events we just read into the buffer
|
|
// While the offset points to at least one whole event...
|
|
for offset <= uint32(n-unix.SizeofInotifyEvent) {
|
|
var (
|
|
// Point "raw" to the event in the buffer
|
|
raw = (*unix.InotifyEvent)(unsafe.Pointer(&buf[offset]))
|
|
mask = uint32(raw.Mask)
|
|
nameLen = uint32(raw.Len)
|
|
)
|
|
|
|
if mask&unix.IN_Q_OVERFLOW != 0 {
|
|
if !w.sendError(ErrEventOverflow) {
|
|
return
|
|
}
|
|
}
|
|
|
|
// If the event happened to the watched directory or the watched file, the kernel
|
|
// doesn't append the filename to the event, but we would like to always fill the
|
|
// the "Name" field with a valid filename. We retrieve the path of the watch from
|
|
// the "paths" map.
|
|
w.mu.Lock()
|
|
name, ok := w.paths[int(raw.Wd)]
|
|
// IN_DELETE_SELF occurs when the file/directory being watched is removed.
|
|
// This is a sign to clean up the maps, otherwise we are no longer in sync
|
|
// with the inotify kernel state which has already deleted the watch
|
|
// automatically.
|
|
if ok && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF {
|
|
delete(w.paths, int(raw.Wd))
|
|
delete(w.watches, name)
|
|
}
|
|
w.mu.Unlock()
|
|
|
|
if nameLen > 0 {
|
|
// Point "bytes" at the first byte of the filename
|
|
bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen]
|
|
// The filename is padded with NULL bytes. TrimRight() gets rid of those.
|
|
name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000")
|
|
}
|
|
|
|
event := w.newEvent(name, mask)
|
|
|
|
// Send the events that are not ignored on the events channel
|
|
if mask&unix.IN_IGNORED == 0 {
|
|
if !w.sendEvent(event) {
|
|
return
|
|
}
|
|
}
|
|
|
|
// Move to the next event in the buffer
|
|
offset += unix.SizeofInotifyEvent + nameLen
|
|
}
|
|
}
|
|
}
|
|
|
|
// newEvent returns an platform-independent Event based on an inotify mask.
|
|
func (w *Watcher) newEvent(name string, mask uint32) Event {
|
|
e := Event{Name: name}
|
|
if mask&unix.IN_CREATE == unix.IN_CREATE || mask&unix.IN_MOVED_TO == unix.IN_MOVED_TO {
|
|
e.Op |= Create
|
|
}
|
|
if mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF || mask&unix.IN_DELETE == unix.IN_DELETE {
|
|
e.Op |= Remove
|
|
}
|
|
if mask&unix.IN_MODIFY == unix.IN_MODIFY {
|
|
e.Op |= Write
|
|
}
|
|
if mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF || mask&unix.IN_MOVED_FROM == unix.IN_MOVED_FROM {
|
|
e.Op |= Rename
|
|
}
|
|
if mask&unix.IN_ATTRIB == unix.IN_ATTRIB {
|
|
e.Op |= Chmod
|
|
}
|
|
return e
|
|
}
|