diff --git a/.github/workflows/auto-assign.yaml b/.github/workflows/auto-assign.yaml
new file mode 100644
index 000000000..fce7665e5
--- /dev/null
+++ b/.github/workflows/auto-assign.yaml
@@ -0,0 +1,20 @@
+---
+name: Assign issue to contributor
+# yamllint disable-line rule:truthy
+on:
+ issue_comment:
+ types: [created, edited]
+
+jobs:
+ assign:
+ name: Run self assign job
+ runs-on: ubuntu-latest
+ steps:
+ - name: take the issue
+ uses: bdougie/take-action@main
+ with:
+ message: >
+ Thanks for taking this issue!
+ Let us know if you have any questions!
+ trigger: /assign
+ token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/charts/ceph-csi-cephfs/README.md b/charts/ceph-csi-cephfs/README.md
index 2826d0536..2bfe4df53 100644
--- a/charts/ceph-csi-cephfs/README.md
+++ b/charts/ceph-csi-cephfs/README.md
@@ -164,6 +164,8 @@ charts and their default values.
| `provisioner.podSecurityPolicy.enabled` | Specifies whether podSecurityPolicy is enabled | `false` |
| `provisionerSocketFile` | The filename of the provisioner socket | `csi-provisioner.sock` |
| `pluginSocketFile` | The filename of the plugin socket | `csi.sock` |
+| `readAffinity.enabled` | Enable read affinity for CephFS subvolumes. Recommended to set to true if running kernel 5.8 or newer. | `false` |
+| `readAffinity.crushLocationLabels` | Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. For more information, click [here](https://github.com/ceph/ceph-csi/blob/v3.9.0/docs/deploy-cephfs.md#read-affinity-using-crush-locations-for-cephfs-subvolumes)| `[]` |
| `kubeletDir` | Kubelet working directory | `/var/lib/kubelet` |
| `driverName` | Name of the csi-driver | `cephfs.csi.ceph.com` |
| `configMapName` | Name of the configmap which contains cluster configuration | `ceph-csi-config` |
diff --git a/charts/ceph-csi-cephfs/templates/nodeplugin-clusterrole.yaml b/charts/ceph-csi-cephfs/templates/nodeplugin-clusterrole.yaml
new file mode 100644
index 000000000..e425f1840
--- /dev/null
+++ b/charts/ceph-csi-cephfs/templates/nodeplugin-clusterrole.yaml
@@ -0,0 +1,19 @@
+{{- if .Values.rbac.create -}}
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: {{ include "ceph-csi-cephfs.nodeplugin.fullname" . }}
+ labels:
+ app: {{ include "ceph-csi-cephfs.name" . }}
+ chart: {{ include "ceph-csi-cephfs.chart" . }}
+ component: {{ .Values.nodeplugin.name }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ {{- with .Values.commonLabels }}{{ toYaml . | trim | nindent 4 }}{{- end }}
+rules:
+{{- if and .Values.readAffinity .Values.readAffinity.enabled }}
+ - apiGroups: [""]
+ resources: ["nodes"]
+ verbs: ["get"]
+{{- end }}
+{{- end -}}
diff --git a/charts/ceph-csi-cephfs/templates/nodeplugin-clusterrolebinding.yaml b/charts/ceph-csi-cephfs/templates/nodeplugin-clusterrolebinding.yaml
new file mode 100644
index 000000000..fd8d5787a
--- /dev/null
+++ b/charts/ceph-csi-cephfs/templates/nodeplugin-clusterrolebinding.yaml
@@ -0,0 +1,21 @@
+{{- if .Values.rbac.create -}}
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: {{ include "ceph-csi-cephfs.nodeplugin.fullname" . }}
+ labels:
+ app: {{ include "ceph-csi-cephfs.name" . }}
+ chart: {{ include "ceph-csi-cephfs.chart" . }}
+ component: {{ .Values.nodeplugin.name }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ {{- with .Values.commonLabels }}{{ toYaml . | trim | nindent 4 }}{{- end }}
+subjects:
+ - kind: ServiceAccount
+ name: {{ include "ceph-csi-cephfs.serviceAccountName.nodeplugin" . }}
+ namespace: {{ .Release.Namespace }}
+roleRef:
+ kind: ClusterRole
+ name: {{ include "ceph-csi-cephfs.nodeplugin.fullname" . }}
+ apiGroup: rbac.authorization.k8s.io
+{{- end -}}
diff --git a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml
index d3a5b9df8..264e50ea9 100644
--- a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml
+++ b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml
@@ -85,6 +85,10 @@ spec:
- "--drivername=$(DRIVER_NAME)"
{{- if .Values.nodeplugin.profiling.enabled }}
- "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}"
+{{- end }}
+ - "--enable-read-affinity={{ and .Values.readAffinity .Values.readAffinity.enabled }}"
+{{- if and .Values.readAffinity .Values.readAffinity.enabled }}
+ - "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}"
{{- end }}
env:
- name: POD_IP
diff --git a/charts/ceph-csi-cephfs/values.yaml b/charts/ceph-csi-cephfs/values.yaml
index 552f58bb0..96336e4d4 100644
--- a/charts/ceph-csi-cephfs/values.yaml
+++ b/charts/ceph-csi-cephfs/values.yaml
@@ -222,6 +222,17 @@ provisioner:
affinity: {}
+# readAffinity:
+# Enable read affinity for CephFS subvolumes. Recommended to
+# set to true if running kernel 5.8 or newer.
+# enabled: false
+# Define which node labels to use as CRUSH location.
+# This should correspond to the values set in the CRUSH map.
+# NOTE: the value here serves as an example
+# crushLocationLabels:
+# - topology.kubernetes.io/region
+# - topology.kubernetes.io/zone
+
# Mount the host /etc/selinux inside pods to support
# selinux-enabled filesystems
selinuxMount: true
diff --git a/deploy/cephcsi/image/Dockerfile b/deploy/cephcsi/image/Dockerfile
index 1baaa7c4f..b829e9060 100644
--- a/deploy/cephcsi/image/Dockerfile
+++ b/deploy/cephcsi/image/Dockerfile
@@ -37,7 +37,7 @@ RUN source /build.env && \
RUN ${GOROOT}/bin/go version && ${GOROOT}/bin/go env
RUN dnf -y install --nodocs \
- librados-devel librbd-devel \
+ librados-devel librbd-devel libcephfs-devel \
/usr/bin/cc \
make \
git \
diff --git a/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml b/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml
index 8aa526ba4..a38ace99f 100644
--- a/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml
+++ b/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml
@@ -63,6 +63,15 @@ spec:
# and pass the label names below, for CSI to consume and advertise
# its equivalent topology domain
# - "--domainlabels=failure-domain/region,failure-domain/zone"
+ #
+ # Options to enable read affinity.
+ # If enabled Ceph CSI will fetch labels from kubernetes node and
+ # pass `read_from_replica=localize,crush_location=type:value` during
+ # CephFS mount command. refer:
+ # https://docs.ceph.com/en/latest/man/8/rbd/#kernel-rbd-krbd-options
+ # for more details.
+ # - "--enable-read-affinity=true"
+ # - "--crush-location-labels=topology.io/zone,topology.io/rack"
env:
- name: POD_IP
valueFrom:
diff --git a/deploy/cephfs/kubernetes/csi-nodeplugin-rbac.yaml b/deploy/cephfs/kubernetes/csi-nodeplugin-rbac.yaml
index c1833d044..a7d4b6bc6 100644
--- a/deploy/cephfs/kubernetes/csi-nodeplugin-rbac.yaml
+++ b/deploy/cephfs/kubernetes/csi-nodeplugin-rbac.yaml
@@ -10,6 +10,9 @@ apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-csi-nodeplugin
rules:
+ - apiGroups: [""]
+ resources: ["nodes"]
+ verbs: ["get"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get"]
diff --git a/docs/deploy-cephfs.md b/docs/deploy-cephfs.md
index 48043877a..ddb3f40f3 100644
--- a/docs/deploy-cephfs.md
+++ b/docs/deploy-cephfs.md
@@ -47,6 +47,8 @@ make image-cephcsi
| `--kernelmountoptions` | _empty_ | Comma separated string of mount options accepted by cephfs kernel mounter.
`Note: These options will be replaced if kernelMountOptions are defined in the ceph-csi-config ConfigMap for the specific cluster.` |
| `--fusemountoptions` | _empty_ | Comma separated string of mount options accepted by ceph-fuse mounter.
`Note: These options will be replaced if fuseMountOptions are defined in the ceph-csi-config ConfigMap for the specific cluster.` |
| `--domainlabels` | _empty_ | Kubernetes node labels to use as CSI domain labels for topology aware provisioning, should be a comma separated value (ex:= "failure-domain/region,failure-domain/zone") |
+| `--enable-read-affinity` | `false` | enable read affinity |
+| `--crush-location-labels`| _empty_ | Kubernetes node labels that determine the CRUSH location the node belongs to, separated by ','.
`Note: These labels will be replaced if crush location labels are defined in the ceph-csi-config ConfigMap for the specific cluster.` |
**NOTE:** The parameter `-forcecephkernelclient` enables the Kernel
CephFS mounter on kernels < 4.17.
@@ -223,6 +225,27 @@ The Helm chart is located in `charts/ceph-csi-cephfs`.
[See the Helm chart readme for installation instructions.](../charts/ceph-csi-cephfs/README.md)
+## Read Affinity using crush locations for CephFS subvolumes
+
+Ceph CSI supports mounting CephFS subvolumes with kernel mount options
+`"read_from_replica=localize,crush_location=type1:value1|type2:value2"` to
+allow serving reads from the most local OSD (according to OSD locations as
+defined in the CRUSH map).
+
+This can be enabled by adding labels to Kubernetes nodes like
+`"topology.io/region=east"` and `"topology.io/zone=east-zone1"` and
+passing command line arguments `"--enable-read-affinity=true"` and
+`"--crush-location-labels=topology.io/zone,topology.io/region"` to Ceph CSI
+CephFS daemonset pod "csi-cephfsplugin" container, resulting in Ceph CSI adding
+`"--options read_from_replica=localize,crush_location=zone:east-zone1|region:east"`
+kernel mount options during cephfs mount operation.
+If enabled, this option will be added to all CephFS subvolumes mapped by Ceph CSI.
+Well known labels can be found
+[here](https://kubernetes.io/docs/reference/labels-annotations-taints/).
+
+>Note: Label values will have all its dots `"."` normalized with dashes `"-"`
+in order for it to work with ceph CRUSH map.
+
## CephFS Volume Encryption
Requires fscrypt support in the Linux kernel and Ceph.
diff --git a/docs/development-guide.md b/docs/development-guide.md
index da57a3e15..e9eca403c 100644
--- a/docs/development-guide.md
+++ b/docs/development-guide.md
@@ -24,8 +24,8 @@ it is **highly** encouraged to:
* Ceph-CSI uses the native Ceph libraries through the [go-ceph
package](https://github.com/ceph/go-ceph). It is required to install the
Ceph C headers in order to compile Ceph-CSI. The packages are called
- `librados-devel` and `librbd-devel` on many Linux distributions. See the
- [go-ceph installation
+ `librados-devel` , `librbd-devel` and `libcephfs-devel`
+ on many Linux distributions. See the [go-ceph installation
instructions](https://github.com/ceph/go-ceph#installation) for more
details.
* Run
diff --git a/go.mod b/go.mod
index 065485c9a..31b8f43d5 100644
--- a/go.mod
+++ b/go.mod
@@ -7,7 +7,7 @@ require (
github.com/aws/aws-sdk-go v1.48.0
github.com/aws/aws-sdk-go-v2/service/sts v1.25.3
github.com/ceph/ceph-csi/api v0.0.0-00010101000000-000000000000
- github.com/ceph/go-ceph v0.24.0
+ github.com/ceph/go-ceph v0.24.1-0.20231116190858-df112a417d31
github.com/container-storage-interface/spec v1.9.0
github.com/csi-addons/spec v0.2.1-0.20230606140122-d20966d2e444
github.com/gemalto/kmip-go v0.0.10
@@ -73,7 +73,7 @@ require (
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gemalto/flume v0.13.0 // indirect
github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 // indirect
- github.com/go-jose/go-jose/v3 v3.0.0 // indirect
+ github.com/go-jose/go-jose/v3 v3.0.1 // indirect
github.com/go-logr/logr v1.3.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
diff --git a/go.sum b/go.sum
index 7ba134051..bf79a6ffc 100644
--- a/go.sum
+++ b/go.sum
@@ -699,8 +699,8 @@ github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
-github.com/ceph/go-ceph v0.24.0 h1:ab1pQCTiNrwjJJJ3bebwQM9tjDQ4tXGKfXAZBNdFiYI=
-github.com/ceph/go-ceph v0.24.0/go.mod h1:gdL5+ewDeHcbV4ZsfD3EH3na35trT07YaTVD1hhJWEg=
+github.com/ceph/go-ceph v0.24.1-0.20231116190858-df112a417d31 h1:fE2zYRU9FzR+B1PZsBXFxwHwF11sTA6EXGz5UzNcc3c=
+github.com/ceph/go-ceph v0.24.1-0.20231116190858-df112a417d31/go.mod h1:9CbXz5yKYVVw71nx3w9kh+odfkPrXxGCxVniH1QVv38=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@@ -814,8 +814,9 @@ github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmn
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
-github.com/go-jose/go-jose/v3 v3.0.0 h1:s6rrhirfEP/CGIoc6p+PZAeogN2SxKav6Wp7+dyMWVo=
github.com/go-jose/go-jose/v3 v3.0.0/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8=
+github.com/go-jose/go-jose/v3 v3.0.1 h1:pWmKFVtt+Jl0vBZTIpz/eAKwsm6LkIxDVVbFHKkchhA=
+github.com/go-jose/go-jose/v3 v3.0.1/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
diff --git a/internal/cephfs/core/metadata.go b/internal/cephfs/core/metadata.go
index 9e2b90d5f..956707e4c 100644
--- a/internal/cephfs/core/metadata.go
+++ b/internal/cephfs/core/metadata.go
@@ -19,8 +19,8 @@ package core
import (
"errors"
"fmt"
- "strings"
+ libcephfs "github.com/ceph/go-ceph/cephfs"
fsAdmin "github.com/ceph/go-ceph/cephfs/admin"
)
@@ -133,8 +133,7 @@ func (s *subVolumeClient) UnsetAllMetadata(keys []string) error {
if errors.Is(err, ErrSubVolMetadataNotSupported) {
return nil
}
- // TODO: replace string comparison with errno.
- if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
+ if err != nil && !errors.Is(err, libcephfs.ErrNotExist) {
return fmt.Errorf("failed to unset metadata key %q on subvolume %v: %w", key, s, err)
}
}
@@ -144,8 +143,7 @@ func (s *subVolumeClient) UnsetAllMetadata(keys []string) error {
if errors.Is(err, ErrSubVolMetadataNotSupported) {
return nil
}
- // TODO: replace string comparison with errno.
- if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
+ if err != nil && !errors.Is(err, libcephfs.ErrNotExist) {
return fmt.Errorf("failed to unset metadata key %q on subvolume %v: %w", clusterNameKey, s, err)
}
diff --git a/internal/cephfs/core/snapshot_metadata.go b/internal/cephfs/core/snapshot_metadata.go
index f168fbf8c..b5cb5457b 100644
--- a/internal/cephfs/core/snapshot_metadata.go
+++ b/internal/cephfs/core/snapshot_metadata.go
@@ -19,8 +19,8 @@ package core
import (
"errors"
"fmt"
- "strings"
+ libcephfs "github.com/ceph/go-ceph/cephfs"
fsAdmin "github.com/ceph/go-ceph/cephfs/admin"
)
@@ -121,16 +121,14 @@ func (s *snapshotClient) UnsetAllSnapshotMetadata(keys []string) error {
for _, key := range keys {
err := s.removeSnapshotMetadata(key)
- // TODO: replace string comparison with errno.
- if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
+ if err != nil && !errors.Is(err, libcephfs.ErrNotExist) {
return fmt.Errorf("failed to unset metadata key %q on subvolume snapshot %s %s in fs %s: %w",
key, s.SnapshotID, s.VolID, s.FsName, err)
}
}
err := s.removeSnapshotMetadata(clusterNameKey)
- // TODO: replace string comparison with errno.
- if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
+ if err != nil && !errors.Is(err, libcephfs.ErrNotExist) {
return fmt.Errorf("failed to unset metadata key %q on subvolume snapshot %s %s in fs %s: %w",
clusterNameKey, s.SnapshotID, s.VolID, s.FsName, err)
}
diff --git a/internal/cephfs/driver.go b/internal/cephfs/driver.go
index 951b71456..9f1957fd6 100644
--- a/internal/cephfs/driver.go
+++ b/internal/cephfs/driver.go
@@ -28,6 +28,7 @@ import (
hc "github.com/ceph/ceph-csi/internal/health-checker"
"github.com/ceph/ceph-csi/internal/journal"
"github.com/ceph/ceph-csi/internal/util"
+ "github.com/ceph/ceph-csi/internal/util/k8s"
"github.com/ceph/ceph-csi/internal/util/log"
"github.com/container-storage-interface/spec/lib/go/csi"
@@ -74,24 +75,29 @@ func NewControllerServer(d *csicommon.CSIDriver) *ControllerServer {
func NewNodeServer(
d *csicommon.CSIDriver,
t string,
- topology map[string]string,
kernelMountOptions string,
fuseMountOptions string,
+ nodeLabels, topology, crushLocationMap map[string]string,
) *NodeServer {
- return &NodeServer{
- DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology),
+ cliReadAffinityMapOptions := util.ConstructReadAffinityMapOption(crushLocationMap)
+ ns := &NodeServer{
+ DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, cliReadAffinityMapOptions, topology, nodeLabels),
VolumeLocks: util.NewVolumeLocks(),
kernelMountOptions: kernelMountOptions,
fuseMountOptions: fuseMountOptions,
healthChecker: hc.NewHealthCheckManager(),
}
+
+ return ns
}
// Run start a non-blocking grpc controller,node and identityserver for
// ceph CSI driver which can serve multiple parallel requests.
func (fs *Driver) Run(conf *util.Config) {
- var err error
- var topology map[string]string
+ var (
+ err error
+ nodeLabels, topology, crushLocationMap map[string]string
+ )
// Configuration
if err = mounter.LoadAvailableMounters(conf); err != nil {
@@ -102,6 +108,18 @@ func (fs *Driver) Run(conf *util.Config) {
if conf.InstanceID != "" {
CSIInstanceID = conf.InstanceID
}
+
+ if conf.IsNodeServer && k8s.RunsOnKubernetes() {
+ nodeLabels, err = k8s.GetNodeLabels(conf.NodeID)
+ if err != nil {
+ log.FatalLogMsg(err.Error())
+ }
+ }
+
+ if conf.EnableReadAffinity {
+ crushLocationMap = util.GetCrushLocationMap(conf.CrushLocationLabels, nodeLabels)
+ }
+
// Create an instance of the volume journal
store.VolJournal = journal.NewCSIVolumeJournalWithNamespace(CSIInstanceID, fsutil.RadosNamespace)
@@ -138,7 +156,11 @@ func (fs *Driver) Run(conf *util.Config) {
if err != nil {
log.FatalLogMsg(err.Error())
}
- fs.ns = NewNodeServer(fs.cd, conf.Vtype, topology, conf.KernelMountOptions, conf.FuseMountOptions)
+ fs.ns = NewNodeServer(
+ fs.cd, conf.Vtype,
+ conf.KernelMountOptions, conf.FuseMountOptions,
+ nodeLabels, topology, crushLocationMap,
+ )
}
if conf.IsControllerServer {
@@ -151,7 +173,11 @@ func (fs *Driver) Run(conf *util.Config) {
if err != nil {
log.FatalLogMsg(err.Error())
}
- fs.ns = NewNodeServer(fs.cd, conf.Vtype, topology, conf.KernelMountOptions, conf.FuseMountOptions)
+ fs.ns = NewNodeServer(
+ fs.cd, conf.Vtype,
+ conf.KernelMountOptions, conf.FuseMountOptions,
+ nodeLabels, topology, crushLocationMap,
+ )
fs.cs = NewControllerServer(fs.cd)
}
diff --git a/internal/cephfs/nodeserver.go b/internal/cephfs/nodeserver.go
index 5d353cc66..58b2e3deb 100644
--- a/internal/cephfs/nodeserver.go
+++ b/internal/cephfs/nodeserver.go
@@ -766,11 +766,12 @@ func (ns *NodeServer) setMountOptions(
csiConfigFile string,
) error {
var (
- configuredMountOptions string
- kernelMountOptions string
- fuseMountOptions string
- mountOptions []string
- err error
+ configuredMountOptions string
+ readAffinityMountOptions string
+ kernelMountOptions string
+ fuseMountOptions string
+ mountOptions []string
+ err error
)
if m := volCap.GetMount(); m != nil {
mountOptions = m.GetMountFlags()
@@ -781,6 +782,14 @@ func (ns *NodeServer) setMountOptions(
if err != nil {
return err
}
+
+ // read affinity mount options
+ readAffinityMountOptions, err = util.GetReadAffinityMapOptions(
+ csiConfigFile, volOptions.ClusterID, ns.CLIReadAffinityOptions, ns.NodeLabels,
+ )
+ if err != nil {
+ return err
+ }
}
switch mnt.(type) {
@@ -799,6 +808,7 @@ func (ns *NodeServer) setMountOptions(
configuredMountOptions = kernelMountOptions
}
volOptions.KernelMountOptions = util.MountOptionsAdd(volOptions.KernelMountOptions, configuredMountOptions)
+ volOptions.KernelMountOptions = util.MountOptionsAdd(volOptions.KernelMountOptions, readAffinityMountOptions)
volOptions.KernelMountOptions = util.MountOptionsAdd(volOptions.KernelMountOptions, mountOptions...)
}
diff --git a/internal/cephfs/nodeserver_test.go b/internal/cephfs/nodeserver_test.go
index e610d95a6..34095529e 100644
--- a/internal/cephfs/nodeserver_test.go
+++ b/internal/cephfs/nodeserver_test.go
@@ -26,6 +26,7 @@ import (
"github.com/ceph/ceph-csi/internal/cephfs/mounter"
"github.com/ceph/ceph-csi/internal/cephfs/store"
+ csicommon "github.com/ceph/ceph-csi/internal/csi-common"
"github.com/ceph/ceph-csi/internal/util"
)
@@ -63,19 +64,19 @@ func Test_setMountOptions(t *testing.T) {
t.Logf("path = %s", tmpConfPath)
err = os.WriteFile(tmpConfPath, csiConfigFileContent, 0o600)
if err != nil {
- t.Errorf("failed to write %s file content: %v", util.CsiConfigFile, err)
+ t.Errorf("failed to write %s file content: %v", tmpConfPath, err)
}
tests := []struct {
name string
- ns NodeServer
+ ns *NodeServer
mnt mounter.VolumeMounter
volOptions *store.VolumeOptions
want string
}{
{
name: "KernelMountOptions set in cluster-1 config and not set in CLI",
- ns: NodeServer{},
+ ns: &NodeServer{},
mnt: mounter.VolumeMounter(&mounter.KernelMounter{}),
volOptions: &store.VolumeOptions{
ClusterID: "cluster-1",
@@ -84,7 +85,7 @@ func Test_setMountOptions(t *testing.T) {
},
{
name: "FuseMountOptions set in cluster-1 config and not set in CLI",
- ns: NodeServer{},
+ ns: &NodeServer{},
mnt: mounter.VolumeMounter(&mounter.FuseMounter{}),
volOptions: &store.VolumeOptions{
ClusterID: "cluster-1",
@@ -93,7 +94,7 @@ func Test_setMountOptions(t *testing.T) {
},
{
name: "KernelMountOptions set in cluster-1 config and set in CLI",
- ns: NodeServer{
+ ns: &NodeServer{
kernelMountOptions: cliKernelMountOptions,
},
mnt: mounter.VolumeMounter(&mounter.KernelMounter{}),
@@ -104,7 +105,7 @@ func Test_setMountOptions(t *testing.T) {
},
{
name: "FuseMountOptions not set in cluster-2 config and set in CLI",
- ns: NodeServer{
+ ns: &NodeServer{
fuseMountOptions: cliFuseMountOptions,
},
mnt: mounter.VolumeMounter(&mounter.FuseMounter{}),
@@ -115,7 +116,7 @@ func Test_setMountOptions(t *testing.T) {
},
{
name: "KernelMountOptions not set in cluster-2 config and set in CLI",
- ns: NodeServer{
+ ns: &NodeServer{
kernelMountOptions: cliKernelMountOptions,
},
mnt: mounter.VolumeMounter(&mounter.KernelMounter{}),
@@ -126,7 +127,7 @@ func Test_setMountOptions(t *testing.T) {
},
{
name: "FuseMountOptions not set in cluster-1 config and set in CLI",
- ns: NodeServer{
+ ns: &NodeServer{
fuseMountOptions: cliFuseMountOptions,
},
mnt: mounter.VolumeMounter(&mounter.FuseMounter{}),
@@ -146,6 +147,11 @@ func Test_setMountOptions(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
+ driver := &csicommon.CSIDriver{}
+ tc.ns.DefaultNodeServer = csicommon.NewDefaultNodeServer(
+ driver, "cephfs", "", map[string]string{}, map[string]string{},
+ )
+
err := tc.ns.setMountOptions(tc.mnt, tc.volOptions, volCap, tmpConfPath)
if err != nil {
t.Errorf("setMountOptions() = %v", err)
diff --git a/internal/csi-common/nodeserver-default.go b/internal/csi-common/nodeserver-default.go
index 4e6be24b2..3d0bae93a 100644
--- a/internal/csi-common/nodeserver-default.go
+++ b/internal/csi-common/nodeserver-default.go
@@ -31,6 +31,10 @@ type DefaultNodeServer struct {
Driver *CSIDriver
Type string
Mounter mount.Interface
+ // NodeLabels stores the node labels
+ NodeLabels map[string]string
+ // CLIReadAffinityOptions contains map options passed through command line to enable read affinity.
+ CLIReadAffinityOptions string
}
// NodeGetInfo returns node ID.
diff --git a/internal/csi-common/utils.go b/internal/csi-common/utils.go
index 03323af04..080f9df93 100644
--- a/internal/csi-common/utils.go
+++ b/internal/csi-common/utils.go
@@ -55,13 +55,18 @@ func NewVolumeCapabilityAccessMode(mode csi.VolumeCapability_AccessMode_Mode) *c
}
// NewDefaultNodeServer initializes default node server.
-func NewDefaultNodeServer(d *CSIDriver, t string, topology map[string]string) *DefaultNodeServer {
+func NewDefaultNodeServer(
+ d *CSIDriver, t, cliReadAffinityMapOptions string,
+ topology, nodeLabels map[string]string,
+) *DefaultNodeServer {
d.topology = topology
return &DefaultNodeServer{
- Driver: d,
- Type: t,
- Mounter: mount.NewWithoutSystemd(""),
+ Driver: d,
+ Type: t,
+ Mounter: mount.NewWithoutSystemd(""),
+ NodeLabels: nodeLabels,
+ CLIReadAffinityOptions: cliReadAffinityMapOptions,
}
}
diff --git a/internal/nfs/nodeserver/nodeserver.go b/internal/nfs/nodeserver/nodeserver.go
index 7d49fdc39..19baaa80a 100644
--- a/internal/nfs/nodeserver/nodeserver.go
+++ b/internal/nfs/nodeserver/nodeserver.go
@@ -54,7 +54,7 @@ func NewNodeServer(
t string,
) *NodeServer {
return &NodeServer{
- DefaultNodeServer: *csicommon.NewDefaultNodeServer(d, t, map[string]string{}),
+ DefaultNodeServer: *csicommon.NewDefaultNodeServer(d, t, "", map[string]string{}, map[string]string{}),
}
}
diff --git a/internal/rbd/driver/driver.go b/internal/rbd/driver/driver.go
index eda9f03be..4d7061b8e 100644
--- a/internal/rbd/driver/driver.go
+++ b/internal/rbd/driver/driver.go
@@ -71,11 +71,10 @@ func NewNodeServer(
t string,
nodeLabels, topology, crushLocationMap map[string]string,
) (*rbd.NodeServer, error) {
+ cliReadAffinityMapOptions := util.ConstructReadAffinityMapOption(crushLocationMap)
ns := rbd.NodeServer{
- DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology),
- VolumeLocks: util.NewVolumeLocks(),
- NodeLabels: nodeLabels,
- CLIReadAffinityMapOptions: util.ConstructReadAffinityMapOption(crushLocationMap),
+ DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, cliReadAffinityMapOptions, topology, nodeLabels),
+ VolumeLocks: util.NewVolumeLocks(),
}
return &ns, nil
diff --git a/internal/rbd/nodeserver.go b/internal/rbd/nodeserver.go
index d7961e2bb..773557e0c 100644
--- a/internal/rbd/nodeserver.go
+++ b/internal/rbd/nodeserver.go
@@ -45,10 +45,6 @@ type NodeServer struct {
// A map storing all volumes with ongoing operations so that additional operations
// for that same volume (as defined by VolumeID) return an Aborted error
VolumeLocks *util.VolumeLocks
- // NodeLabels stores the node labels
- NodeLabels map[string]string
- // CLIReadAffinityMapOptions contains map options passed through command line to enable read affinity.
- CLIReadAffinityMapOptions string
}
// stageTransaction struct represents the state a transaction was when it either completed
diff --git a/internal/rbd/nodeserver_test.go b/internal/rbd/nodeserver_test.go
index bffc114dc..59c2c274c 100644
--- a/internal/rbd/nodeserver_test.go
+++ b/internal/rbd/nodeserver_test.go
@@ -22,6 +22,7 @@ import (
"os"
"testing"
+ csicommon "github.com/ceph/ceph-csi/internal/csi-common"
"github.com/ceph/ceph-csi/internal/util"
"github.com/container-storage-interface/spec/lib/go/csi"
@@ -206,6 +207,7 @@ func TestReadAffinity_GetReadAffinityMapOptions(t *testing.T) {
"topology.kubernetes.io/zone": "east-1",
"topology.kubernetes.io/region": "east",
}
+ topology := map[string]string{}
csiConfig := []util.ClusterInfo{
{
@@ -304,11 +306,16 @@ func TestReadAffinity_GetReadAffinityMapOptions(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
crushLocationMap := util.GetCrushLocationMap(tc.CLICrushLocationLabels, nodeLabels)
+ cliReadAffinityMapOptions := util.ConstructReadAffinityMapOption(crushLocationMap)
+ driver := &csicommon.CSIDriver{}
+
ns := &NodeServer{
- CLIReadAffinityMapOptions: util.ConstructReadAffinityMapOption(crushLocationMap),
+ DefaultNodeServer: csicommon.NewDefaultNodeServer(
+ driver, "rbd", cliReadAffinityMapOptions, topology, nodeLabels,
+ ),
}
readAffinityMapOptions, err := util.GetReadAffinityMapOptions(
- tc.clusterID, ns.CLIReadAffinityMapOptions, nodeLabels,
+ tmpConfPath, tc.clusterID, ns.CLIReadAffinityOptions, nodeLabels,
)
if err != nil {
assert.Fail(t, err.Error())
diff --git a/internal/rbd/rbd_attach.go b/internal/rbd/rbd_attach.go
index 37d121d79..dde6a24d3 100644
--- a/internal/rbd/rbd_attach.go
+++ b/internal/rbd/rbd_attach.go
@@ -313,7 +313,7 @@ func (ns *NodeServer) getMapOptions(req *csi.NodeStageVolumeRequest, rv *rbdVolu
}
readAffinityMapOptions, err := util.GetReadAffinityMapOptions(
- rv.ClusterID, ns.CLIReadAffinityMapOptions, ns.NodeLabels,
+ util.CsiConfigFile, rv.ClusterID, ns.CLIReadAffinityOptions, ns.NodeLabels,
)
if err != nil {
return err
diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go
index 0fc782249..08f9f7598 100644
--- a/internal/rbd/rbd_util.go
+++ b/internal/rbd/rbd_util.go
@@ -2143,15 +2143,13 @@ func (rv *rbdVolume) setAllMetadata(parameters map[string]string) error {
func (rv *rbdVolume) unsetAllMetadata(keys []string) error {
for _, key := range keys {
err := rv.RemoveMetadata(key)
- // TODO: replace string comparison with errno.
- if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
+ if err != nil && !errors.Is(err, librbd.ErrNotExist) {
return fmt.Errorf("failed to unset metadata key %q on %q: %w", key, rv, err)
}
}
err := rv.RemoveMetadata(clusterNameKey)
- // TODO: replace string comparison with errno.
- if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
+ if err != nil && !errors.Is(err, librbd.ErrNotExist) {
return fmt.Errorf("failed to unset metadata key %q on %q: %w", clusterNameKey, rv, err)
}
diff --git a/internal/util/read_affinity.go b/internal/util/read_affinity.go
index a62620892..cf802b462 100644
--- a/internal/util/read_affinity.go
+++ b/internal/util/read_affinity.go
@@ -48,7 +48,8 @@ func ConstructReadAffinityMapOption(crushLocationMap map[string]string) string {
// If not, it falls back to returning the `cliReadAffinityMapOptions` from the command line.
// If neither of these options is available, it returns an empty string.
func GetReadAffinityMapOptions(
- clusterID, cliReadAffinityMapOptions string, nodeLabels map[string]string,
+ csiConfigFile, clusterID, cliReadAffinityMapOptions string,
+ nodeLabels map[string]string,
) (string, error) {
var (
err error
@@ -56,7 +57,7 @@ func GetReadAffinityMapOptions(
configCrushLocationLabels string
)
- configReadAffinityEnabled, configCrushLocationLabels, err = GetCrushLocationLabels(CsiConfigFile, clusterID)
+ configReadAffinityEnabled, configCrushLocationLabels, err = GetCrushLocationLabels(csiConfigFile, clusterID)
if err != nil {
return "", err
}
diff --git a/scripts/Dockerfile.devel b/scripts/Dockerfile.devel
index 7e67ef57d..ba6e9fb71 100644
--- a/scripts/Dockerfile.devel
+++ b/scripts/Dockerfile.devel
@@ -28,6 +28,7 @@ RUN dnf -y install \
make \
gcc \
librados-devel \
+ libcephfs-devel \
librbd-devel \
&& dnf -y update \
&& dnf clean all \
diff --git a/scripts/Dockerfile.test b/scripts/Dockerfile.test
index d9d5a7e20..d54049e38 100644
--- a/scripts/Dockerfile.test
+++ b/scripts/Dockerfile.test
@@ -32,6 +32,7 @@ RUN source /build.env \
gcc \
findutils \
librados-devel \
+ libcephfs-devel \
librbd-devel \
openssl \
rubygems \
diff --git a/scripts/install-helm.sh b/scripts/install-helm.sh
index fe4e5d35b..2fe6beebb 100755
--- a/scripts/install-helm.sh
+++ b/scripts/install-helm.sh
@@ -175,7 +175,7 @@ install_cephcsi_helm_charts() {
fi
# install ceph-csi-cephfs and ceph-csi-rbd charts
# shellcheck disable=SC2086
- "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-cephfsplugin-provisioner --set nodeplugin.fullnameOverride=csi-cephfsplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-cephfs", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${CEPHFS_SECRET_TEMPLATE_VALUES} ${CEPHFS_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-cephfs
+ "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-cephfsplugin-provisioner --set nodeplugin.fullnameOverride=csi-cephfsplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-cephfs", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${CEPHFS_SECRET_TEMPLATE_VALUES} ${CEPHFS_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-cephfs --set readAffinity.enabled=true --set readAffinity.crushLocationLabels="{${CRUSH_LOCATION_REGION_LABEL},${CRUSH_LOCATION_ZONE_LABEL}}"
check_deployment_status app=ceph-csi-cephfs "${NAMESPACE}"
check_daemonset_status app=ceph-csi-cephfs "${NAMESPACE}"
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/cephfs.go b/vendor/github.com/ceph/go-ceph/cephfs/cephfs.go
new file mode 100644
index 000000000..e9bd708f2
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/cephfs.go
@@ -0,0 +1,248 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+
+ "github.com/ceph/go-ceph/internal/retry"
+ "github.com/ceph/go-ceph/rados"
+)
+
+// MountInfo exports ceph's ceph_mount_info from libcephfs.cc
+type MountInfo struct {
+ mount *C.struct_ceph_mount_info
+}
+
+func createMount(id *C.char) (*MountInfo, error) {
+ mount := &MountInfo{}
+ ret := C.ceph_create(&mount.mount, id)
+ if ret != 0 {
+ return nil, getError(ret)
+ }
+ return mount, nil
+}
+
+// validate checks whether mount.mount is ready to use or not.
+func (mount *MountInfo) validate() error {
+ if mount.mount == nil {
+ return ErrNotConnected
+ }
+ return nil
+}
+
+// Version returns the major, minor, and patch level of the libcephfs library.
+func Version() (int, int, int) {
+ var cMajor, cMinor, cPatch C.int
+ C.ceph_version(&cMajor, &cMinor, &cPatch)
+ return int(cMajor), int(cMinor), int(cPatch)
+}
+
+// CreateMount creates a mount handle for interacting with Ceph.
+func CreateMount() (*MountInfo, error) {
+ return createMount(nil)
+}
+
+// CreateMountWithId creates a mount handle for interacting with Ceph.
+// The caller can specify a unique id that will identify this client.
+func CreateMountWithId(id string) (*MountInfo, error) {
+ cid := C.CString(id)
+ defer C.free(unsafe.Pointer(cid))
+ return createMount(cid)
+}
+
+// CreateFromRados creates a mount handle using an existing rados cluster
+// connection.
+//
+// Implements:
+//
+// int ceph_create_from_rados(struct ceph_mount_info **cmount, rados_t cluster);
+func CreateFromRados(conn *rados.Conn) (*MountInfo, error) {
+ mount := &MountInfo{}
+ ret := C.ceph_create_from_rados(&mount.mount, C.rados_t(conn.Cluster()))
+ if ret != 0 {
+ return nil, getError(ret)
+ }
+ return mount, nil
+}
+
+// ReadDefaultConfigFile loads the ceph configuration from the default config file.
+//
+// Implements:
+//
+// int ceph_conf_read_file(struct ceph_mount_info *cmount, const char *path_list);
+func (mount *MountInfo) ReadDefaultConfigFile() error {
+ ret := C.ceph_conf_read_file(mount.mount, nil)
+ return getError(ret)
+}
+
+// ReadConfigFile loads the ceph configuration from the specified config file.
+//
+// Implements:
+//
+// int ceph_conf_read_file(struct ceph_mount_info *cmount, const char *path_list);
+func (mount *MountInfo) ReadConfigFile(path string) error {
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ ret := C.ceph_conf_read_file(mount.mount, cPath)
+ return getError(ret)
+}
+
+// ParseConfigArgv configures the mount using a unix style command line
+// argument vector.
+//
+// Implements:
+//
+// int ceph_conf_parse_argv(struct ceph_mount_info *cmount, int argc, const char **argv);
+func (mount *MountInfo) ParseConfigArgv(argv []string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ if len(argv) == 0 {
+ return ErrEmptyArgument
+ }
+ cargv := make([]*C.char, len(argv))
+ for i := range argv {
+ cargv[i] = C.CString(argv[i])
+ defer C.free(unsafe.Pointer(cargv[i]))
+ }
+
+ ret := C.ceph_conf_parse_argv(mount.mount, C.int(len(cargv)), &cargv[0])
+ return getError(ret)
+}
+
+// ParseDefaultConfigEnv configures the mount from the default Ceph
+// environment variable CEPH_ARGS.
+//
+// Implements:
+//
+// int ceph_conf_parse_env(struct ceph_mount_info *cmount, const char *var);
+func (mount *MountInfo) ParseDefaultConfigEnv() error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ ret := C.ceph_conf_parse_env(mount.mount, nil)
+ return getError(ret)
+}
+
+// SetConfigOption sets the value of the configuration option identified by
+// the given name.
+//
+// Implements:
+//
+// int ceph_conf_set(struct ceph_mount_info *cmount, const char *option, const char *value);
+func (mount *MountInfo) SetConfigOption(option, value string) error {
+ cOption := C.CString(option)
+ defer C.free(unsafe.Pointer(cOption))
+ cValue := C.CString(value)
+ defer C.free(unsafe.Pointer(cValue))
+ return getError(C.ceph_conf_set(mount.mount, cOption, cValue))
+}
+
+// GetConfigOption returns the value of the Ceph configuration option
+// identified by the given name.
+//
+// Implements:
+//
+// int ceph_conf_get(struct ceph_mount_info *cmount, const char *option, char *buf, size_t len);
+func (mount *MountInfo) GetConfigOption(option string) (string, error) {
+ cOption := C.CString(option)
+ defer C.free(unsafe.Pointer(cOption))
+
+ var (
+ err error
+ buf []byte
+ )
+ // range from 4k to 256KiB
+ retry.WithSizes(4096, 1<<18, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret := C.ceph_conf_get(
+ mount.mount,
+ cOption,
+ (*C.char)(unsafe.Pointer(&buf[0])),
+ C.size_t(len(buf)))
+ err = getError(ret)
+ return retry.DoubleSize.If(err == errNameTooLong)
+ })
+ if err != nil {
+ return "", err
+ }
+ value := C.GoString((*C.char)(unsafe.Pointer(&buf[0])))
+ return value, nil
+}
+
+// Init the file system client without actually mounting the file system.
+//
+// Implements:
+//
+// int ceph_init(struct ceph_mount_info *cmount);
+func (mount *MountInfo) Init() error {
+ return getError(C.ceph_init(mount.mount))
+}
+
+// Mount the file system, establishing a connection capable of I/O.
+//
+// Implements:
+//
+// int ceph_mount(struct ceph_mount_info *cmount, const char *root);
+func (mount *MountInfo) Mount() error {
+ ret := C.ceph_mount(mount.mount, nil)
+ return getError(ret)
+}
+
+// MountWithRoot mounts the file system using the path provided for the root of
+// the mount. This establishes a connection capable of I/O.
+//
+// Implements:
+//
+// int ceph_mount(struct ceph_mount_info *cmount, const char *root);
+func (mount *MountInfo) MountWithRoot(root string) error {
+ croot := C.CString(root)
+ defer C.free(unsafe.Pointer(croot))
+ return getError(C.ceph_mount(mount.mount, croot))
+}
+
+// Unmount the file system.
+//
+// Implements:
+//
+// int ceph_unmount(struct ceph_mount_info *cmount);
+func (mount *MountInfo) Unmount() error {
+ ret := C.ceph_unmount(mount.mount)
+ return getError(ret)
+}
+
+// Release destroys the mount handle.
+//
+// Implements:
+//
+// int ceph_release(struct ceph_mount_info *cmount);
+func (mount *MountInfo) Release() error {
+ if mount.mount == nil {
+ return nil
+ }
+ ret := C.ceph_release(mount.mount)
+ if err := getError(ret); err != nil {
+ return err
+ }
+ mount.mount = nil
+ return nil
+}
+
+// SyncFs synchronizes all filesystem data to persistent media.
+func (mount *MountInfo) SyncFs() error {
+ ret := C.ceph_sync_fs(mount.mount)
+ return getError(ret)
+}
+
+// IsMounted checks mount status.
+func (mount *MountInfo) IsMounted() bool {
+ ret := C.ceph_is_mounted(mount.mount)
+ return ret == 1
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/command.go b/vendor/github.com/ceph/go-ceph/cephfs/command.go
new file mode 100644
index 000000000..709566a53
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/command.go
@@ -0,0 +1,64 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+
+ "github.com/ceph/go-ceph/internal/cutil"
+)
+
+func cephBufferFree(p unsafe.Pointer) {
+ C.ceph_buffer_free((*C.char)(p))
+}
+
+// MdsCommand sends commands to the specified MDS.
+func (mount *MountInfo) MdsCommand(mdsSpec string, args [][]byte) ([]byte, string, error) {
+ return mount.mdsCommand(mdsSpec, args, nil)
+}
+
+// MdsCommandWithInputBuffer sends commands to the specified MDS, with an input
+// buffer.
+func (mount *MountInfo) MdsCommandWithInputBuffer(mdsSpec string, args [][]byte, inputBuffer []byte) ([]byte, string, error) {
+ return mount.mdsCommand(mdsSpec, args, inputBuffer)
+}
+
+// mdsCommand supports sending formatted commands to MDS.
+//
+// Implements:
+//
+// int ceph_mds_command(struct ceph_mount_info *cmount,
+// const char *mds_spec,
+// const char **cmd,
+// size_t cmdlen,
+// const char *inbuf, size_t inbuflen,
+// char **outbuf, size_t *outbuflen,
+// char **outs, size_t *outslen);
+func (mount *MountInfo) mdsCommand(mdsSpec string, args [][]byte, inputBuffer []byte) (buffer []byte, info string, err error) {
+ spec := C.CString(mdsSpec)
+ defer C.free(unsafe.Pointer(spec))
+ ci := cutil.NewCommandInput(args, inputBuffer)
+ defer ci.Free()
+ co := cutil.NewCommandOutput().SetFreeFunc(cephBufferFree)
+ defer co.Free()
+
+ ret := C.ceph_mds_command(
+ mount.mount, // cephfs mount ref
+ spec, // mds spec
+ (**C.char)(ci.Cmd()),
+ C.size_t(ci.CmdLen()),
+ (*C.char)(ci.InBuf()),
+ C.size_t(ci.InBufLen()),
+ (**C.char)(co.OutBuf()),
+ (*C.size_t)(co.OutBufLen()),
+ (**C.char)(co.Outs()),
+ (*C.size_t)(co.OutsLen()))
+ buf, status := co.GoValues()
+ return buf, status, getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/conn_nautilus.go b/vendor/github.com/ceph/go-ceph/cephfs/conn_nautilus.go
new file mode 100644
index 000000000..53e957ca2
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/conn_nautilus.go
@@ -0,0 +1,30 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+*/
+import "C"
+
+// Some general connectivity and mounting functions are new in
+// Ceph Nautilus.
+
+// GetFsCid returns the cluster ID for a mounted ceph file system.
+// If the object does not refer to a mounted file system, an error
+// will be returned.
+//
+// Note:
+//
+// Only supported in Ceph Nautilus and newer.
+//
+// Implements:
+//
+// int64_t ceph_get_fs_cid(struct ceph_mount_info *cmount);
+func (mount *MountInfo) GetFsCid() (int64, error) {
+ ret := C.ceph_get_fs_cid(mount.mount)
+ if ret < 0 {
+ return 0, getError(C.int(ret))
+ }
+ return int64(ret), nil
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/directory.go b/vendor/github.com/ceph/go-ceph/cephfs/directory.go
new file mode 100644
index 000000000..5a4e2c40a
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/directory.go
@@ -0,0 +1,236 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+// Directory represents an open directory handle.
+type Directory struct {
+ mount *MountInfo
+ dir *C.struct_ceph_dir_result
+}
+
+// OpenDir returns a new Directory handle open for I/O.
+//
+// Implements:
+//
+// int ceph_opendir(struct ceph_mount_info *cmount, const char *name, struct ceph_dir_result **dirpp);
+func (mount *MountInfo) OpenDir(path string) (*Directory, error) {
+ var dir *C.struct_ceph_dir_result
+
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_opendir(mount.mount, cPath, &dir)
+ if ret != 0 {
+ return nil, getError(ret)
+ }
+
+ return &Directory{
+ mount: mount,
+ dir: dir,
+ }, nil
+}
+
+// Close the open directory handle.
+//
+// Implements:
+//
+// int ceph_closedir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp);
+func (dir *Directory) Close() error {
+ return getError(C.ceph_closedir(dir.mount.mount, dir.dir))
+}
+
+// Inode represents an inode number in the file system.
+type Inode uint64
+
+// DType values are used to determine, when possible, the file type
+// of a directory entry.
+type DType uint8
+
+const (
+ // DTypeBlk indicates a directory entry is a block device.
+ DTypeBlk = DType(C.DT_BLK)
+ // DTypeChr indicates a directory entry is a character device.
+ DTypeChr = DType(C.DT_CHR)
+ // DTypeDir indicates a directory entry is a directory.
+ DTypeDir = DType(C.DT_DIR)
+ // DTypeFIFO indicates a directory entry is a named pipe (FIFO).
+ DTypeFIFO = DType(C.DT_FIFO)
+ // DTypeLnk indicates a directory entry is a symbolic link.
+ DTypeLnk = DType(C.DT_LNK)
+ // DTypeReg indicates a directory entry is a regular file.
+ DTypeReg = DType(C.DT_REG)
+ // DTypeSock indicates a directory entry is a UNIX domain socket.
+ DTypeSock = DType(C.DT_SOCK)
+ // DTypeUnknown indicates that the file type could not be determined.
+ DTypeUnknown = DType(C.DT_UNKNOWN)
+)
+
+// DirEntry represents an entry within a directory.
+type DirEntry struct {
+ inode Inode
+ name string
+ dtype DType
+}
+
+// Name returns the directory entry's name.
+func (d *DirEntry) Name() string {
+ return d.name
+}
+
+// Inode returns the directory entry's inode number.
+func (d *DirEntry) Inode() Inode {
+ return d.inode
+}
+
+// DType returns the Directory-entry's Type, indicating if it
+// is a regular file, directory, etc.
+// DType may be unknown and thus require an additional call
+// (stat for example) if Unknown.
+func (d *DirEntry) DType() DType {
+ return d.dtype
+}
+
+// DirEntryPlus is a DirEntry plus additional data (stat) for an entry
+// within a directory.
+type DirEntryPlus struct {
+ DirEntry
+ // statx: the converted statx returned by ceph_readdirplus_r
+ statx *CephStatx
+}
+
+// Statx returns cached stat metadata for the directory entry.
+// This call does not incur an actual file system stat.
+func (d *DirEntryPlus) Statx() *CephStatx {
+ return d.statx
+}
+
+// toDirEntry converts a c struct dirent to our go wrapper.
+func toDirEntry(de *C.struct_dirent) *DirEntry {
+ return &DirEntry{
+ inode: Inode(de.d_ino),
+ name: C.GoString(&de.d_name[0]),
+ dtype: DType(de.d_type),
+ }
+}
+
+// toDirEntryPlus converts c structs set by ceph_readdirplus_r to our go
+// wrapper.
+func toDirEntryPlus(de *C.struct_dirent, s C.struct_ceph_statx) *DirEntryPlus {
+ return &DirEntryPlus{
+ DirEntry: *toDirEntry(de),
+ statx: cStructToCephStatx(s),
+ }
+}
+
+// ReadDir reads a single directory entry from the open Directory.
+// A nil DirEntry pointer will be returned when the Directory stream has been
+// exhausted.
+//
+// Implements:
+//
+// int ceph_readdir_r(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, struct dirent *de);
+func (dir *Directory) ReadDir() (*DirEntry, error) {
+ var de C.struct_dirent
+ ret := C.ceph_readdir_r(dir.mount.mount, dir.dir, &de)
+ if ret < 0 {
+ return nil, getError(ret)
+ }
+ if ret == 0 {
+ return nil, nil // End-of-stream
+ }
+ return toDirEntry(&de), nil
+}
+
+// ReadDirPlus reads a single directory entry and stat information from the
+// open Directory.
+// A nil DirEntryPlus pointer will be returned when the Directory stream has
+// been exhausted.
+// See Statx for a description of the wants and flags parameters.
+//
+// Implements:
+//
+// int ceph_readdirplus_r(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, struct dirent *de,
+// struct ceph_statx *stx, unsigned want, unsigned flags, struct Inode **out);
+func (dir *Directory) ReadDirPlus(
+ want StatxMask, flags AtFlags) (*DirEntryPlus, error) {
+
+ var (
+ de C.struct_dirent
+ s C.struct_ceph_statx
+ )
+ ret := C.ceph_readdirplus_r(
+ dir.mount.mount,
+ dir.dir,
+ &de,
+ &s,
+ C.uint(want),
+ C.uint(flags),
+ nil, // unused, internal Inode type not needed for high level api
+ )
+ if ret < 0 {
+ return nil, getError(ret)
+ }
+ if ret == 0 {
+ return nil, nil // End-of-stream
+ }
+ return toDirEntryPlus(&de, s), nil
+}
+
+// RewindDir sets the directory stream to the beginning of the directory.
+//
+// Implements:
+//
+// void ceph_rewinddir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp);
+func (dir *Directory) RewindDir() {
+ C.ceph_rewinddir(dir.mount.mount, dir.dir)
+}
+
+// dirEntries provides a convenient wrapper around slices of DirEntry items.
+// For example, use the Names() call to easily get only the names from a
+// DirEntry slice.
+type dirEntries []*DirEntry
+
+// list returns all the contents of a directory as a dirEntries slice.
+//
+// list is implemented using ReadDir. If any of the calls to ReadDir returns
+// an error List will return an error. However, all previous entries
+// collected will still be returned. Callers of this function may want to check
+// the entries return value even when an error is returned.
+// List rewinds the handle every time it is called to get a full
+// listing of directory contents.
+func (dir *Directory) list() (dirEntries, error) {
+ var (
+ err error
+ entry *DirEntry
+ entries = make(dirEntries, 0)
+ )
+ dir.RewindDir()
+ for {
+ entry, err = dir.ReadDir()
+ if err != nil || entry == nil {
+ break
+ }
+ entries = append(entries, entry)
+ }
+ return entries, err
+}
+
+// names returns a slice of only the name fields from dir entries.
+func (entries dirEntries) names() []string {
+ names := make([]string, len(entries))
+ for i, v := range entries {
+ names[i] = v.Name()
+ }
+ return names
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/doc.go b/vendor/github.com/ceph/go-ceph/cephfs/doc.go
new file mode 100644
index 000000000..3fc2e3cbe
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/doc.go
@@ -0,0 +1,4 @@
+/*
+Package cephfs contains a set of wrappers around Ceph's libcephfs API.
+*/
+package cephfs
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/errors.go b/vendor/github.com/ceph/go-ceph/cephfs/errors.go
new file mode 100644
index 000000000..b139229d5
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/errors.go
@@ -0,0 +1,67 @@
+package cephfs
+
+/*
+#include
+*/
+import "C"
+
+import (
+ "errors"
+
+ "github.com/ceph/go-ceph/internal/errutil"
+)
+
+// cephFSError represents an error condition returned from the CephFS APIs.
+type cephFSError int
+
+// Error returns the error string for the cephFSError type.
+func (e cephFSError) Error() string {
+ return errutil.FormatErrorCode("cephfs", int(e))
+}
+
+func (e cephFSError) ErrorCode() int {
+ return int(e)
+}
+
+func getError(e C.int) error {
+ if e == 0 {
+ return nil
+ }
+ return cephFSError(e)
+}
+
+// getErrorIfNegative converts a ceph return code to error if negative.
+// This is useful for functions that return a usable positive value on
+// success but a negative error number on error.
+func getErrorIfNegative(ret C.int) error {
+ if ret >= 0 {
+ return nil
+ }
+ return getError(ret)
+}
+
+// Public go errors:
+
+var (
+ // ErrEmptyArgument may be returned if a function argument is passed
+ // a zero-length slice or map.
+ ErrEmptyArgument = errors.New("Argument must contain at least one item")
+)
+
+// Public CephFSErrors:
+
+const (
+ // ErrNotConnected may be returned when client is not connected
+ // to a cluster.
+ ErrNotConnected = cephFSError(-C.ENOTCONN)
+ // ErrNotExist indicates a non-specific missing resource.
+ ErrNotExist = cephFSError(-C.ENOENT)
+)
+
+// Private errors:
+
+const (
+ errInvalid = cephFSError(-C.EINVAL)
+ errNameTooLong = cephFSError(-C.ENAMETOOLONG)
+ errRange = cephFSError(-C.ERANGE)
+)
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/file.go b/vendor/github.com/ceph/go-ceph/cephfs/file.go
new file mode 100644
index 000000000..115f63778
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/file.go
@@ -0,0 +1,430 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#define _GNU_SOURCE
+#include
+#include
+#include
+*/
+import "C"
+
+import (
+ "io"
+ "unsafe"
+
+ "github.com/ceph/go-ceph/internal/cutil"
+)
+
+const (
+ // SeekSet is used with Seek to set the absolute position in the file.
+ SeekSet = int(C.SEEK_SET)
+ // SeekCur is used with Seek to position the file relative to the current
+ // position.
+ SeekCur = int(C.SEEK_CUR)
+ // SeekEnd is used with Seek to position the file relative to the end.
+ SeekEnd = int(C.SEEK_END)
+)
+
+// SyncChoice is used to control how metadata and/or data is sync'ed to
+// the file system.
+type SyncChoice int
+
+const (
+ // SyncAll will synchronize both data and metadata.
+ SyncAll = SyncChoice(0)
+ // SyncDataOnly will synchronize only data.
+ SyncDataOnly = SyncChoice(1)
+)
+
+// File represents an open file descriptor in cephfs.
+type File struct {
+ mount *MountInfo
+ fd C.int
+}
+
+// Open a file at the given path. The flags are the same os flags as
+// a local open call. Mode is the same mode bits as a local open call.
+//
+// Implements:
+//
+// int ceph_open(struct ceph_mount_info *cmount, const char *path, int flags, mode_t mode);
+func (mount *MountInfo) Open(path string, flags int, mode uint32) (*File, error) {
+ if mount.mount == nil {
+ return nil, ErrNotConnected
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ ret := C.ceph_open(mount.mount, cPath, C.int(flags), C.mode_t(mode))
+ if ret < 0 {
+ return nil, getError(ret)
+ }
+ return &File{mount: mount, fd: ret}, nil
+}
+
+func (f *File) validate() error {
+ if f.mount == nil {
+ return ErrNotConnected
+ }
+ return nil
+}
+
+// Close the file.
+//
+// Implements:
+//
+// int ceph_close(struct ceph_mount_info *cmount, int fd);
+func (f *File) Close() error {
+ if f.fd == -1 {
+ // already closed
+ return nil
+ }
+ if err := f.validate(); err != nil {
+ return err
+ }
+ if err := getError(C.ceph_close(f.mount.mount, f.fd)); err != nil {
+ return err
+ }
+ f.fd = -1
+ return nil
+}
+
+// read directly wraps the ceph_read call. Because read is such a common
+// operation we deviate from the ceph naming and expose Read and ReadAt
+// wrappers for external callers of the library.
+//
+// Implements:
+//
+// int ceph_read(struct ceph_mount_info *cmount, int fd, char *buf, int64_t size, int64_t offset);
+func (f *File) read(buf []byte, offset int64) (int, error) {
+ if err := f.validate(); err != nil {
+ return 0, err
+ }
+ bufptr := (*C.char)(unsafe.Pointer(&buf[0]))
+ ret := C.ceph_read(
+ f.mount.mount, f.fd, bufptr, C.int64_t(len(buf)), C.int64_t(offset))
+ switch {
+ case ret < 0:
+ return 0, getError(ret)
+ case ret == 0:
+ return 0, io.EOF
+ }
+ return int(ret), nil
+}
+
+// Read data from file. Up to len(buf) bytes will be read from the file.
+// The number of bytes read will be returned.
+// When nothing is left to read from the file, Read returns, 0, io.EOF.
+func (f *File) Read(buf []byte) (int, error) {
+ // to-consider: should we mimic Go's behavior of returning an
+ // io.ErrShortWrite error if write length < buf size?
+ return f.read(buf, -1)
+}
+
+// ReadAt will read data from the file starting at the given offset.
+// Up to len(buf) bytes will be read from the file.
+// The number of bytes read will be returned.
+// When nothing is left to read from the file, ReadAt returns, 0, io.EOF.
+func (f *File) ReadAt(buf []byte, offset int64) (int, error) {
+ if offset < 0 {
+ return 0, errInvalid
+ }
+ return f.read(buf, offset)
+}
+
+// Preadv will read data from the file, starting at the given offset,
+// into the byte-slice data buffers sequentially.
+// The number of bytes read will be returned.
+// When nothing is left to read from the file the return values will be:
+// 0, io.EOF.
+//
+// Implements:
+//
+// int ceph_preadv(struct ceph_mount_info *cmount, int fd, const struct iovec *iov, int iovcnt,
+// int64_t offset);
+func (f *File) Preadv(data [][]byte, offset int64) (int, error) {
+ if err := f.validate(); err != nil {
+ return 0, err
+ }
+ iov := cutil.ByteSlicesToIovec(data)
+ defer iov.Free()
+
+ ret := C.ceph_preadv(
+ f.mount.mount,
+ f.fd,
+ (*C.struct_iovec)(iov.Pointer()),
+ C.int(iov.Len()),
+ C.int64_t(offset))
+ switch {
+ case ret < 0:
+ return 0, getError(ret)
+ case ret == 0:
+ return 0, io.EOF
+ }
+ iov.Sync()
+ return int(ret), nil
+}
+
+// write directly wraps the ceph_write call. Because write is such a common
+// operation we deviate from the ceph naming and expose Write and WriteAt
+// wrappers for external callers of the library.
+//
+// Implements:
+//
+// int ceph_write(struct ceph_mount_info *cmount, int fd, const char *buf,
+// int64_t size, int64_t offset);
+func (f *File) write(buf []byte, offset int64) (int, error) {
+ if err := f.validate(); err != nil {
+ return 0, err
+ }
+ bufptr := (*C.char)(unsafe.Pointer(&buf[0]))
+ ret := C.ceph_write(
+ f.mount.mount, f.fd, bufptr, C.int64_t(len(buf)), C.int64_t(offset))
+ if ret < 0 {
+ return 0, getError(ret)
+ }
+ return int(ret), nil
+}
+
+// Write data from buf to the file.
+// The number of bytes written is returned.
+func (f *File) Write(buf []byte) (int, error) {
+ return f.write(buf, -1)
+}
+
+// WriteAt writes data from buf to the file at the specified offset.
+// The number of bytes written is returned.
+func (f *File) WriteAt(buf []byte, offset int64) (int, error) {
+ if offset < 0 {
+ return 0, errInvalid
+ }
+ return f.write(buf, offset)
+}
+
+// Pwritev writes data from the slice of byte-slice buffers to the file at the
+// specified offset.
+// The number of bytes written is returned.
+//
+// Implements:
+//
+// int ceph_pwritev(struct ceph_mount_info *cmount, int fd, const struct iovec *iov, int iovcnt,
+// int64_t offset);
+func (f *File) Pwritev(data [][]byte, offset int64) (int, error) {
+ if err := f.validate(); err != nil {
+ return 0, err
+ }
+ iov := cutil.ByteSlicesToIovec(data)
+ defer iov.Free()
+
+ ret := C.ceph_pwritev(
+ f.mount.mount,
+ f.fd,
+ (*C.struct_iovec)(iov.Pointer()),
+ C.int(iov.Len()),
+ C.int64_t(offset))
+ if ret < 0 {
+ return 0, getError(ret)
+ }
+ return int(ret), nil
+}
+
+// Seek will reposition the file stream based on the given offset.
+//
+// Implements:
+//
+// int64_t ceph_lseek(struct ceph_mount_info *cmount, int fd, int64_t offset, int whence);
+func (f *File) Seek(offset int64, whence int) (int64, error) {
+ if err := f.validate(); err != nil {
+ return 0, err
+ }
+ // validate the seek whence value in case the caller skews
+ // from the seek values we technically support from C as documented.
+ // TODO: need to support seek-(hole|data) in mimic and later.
+ switch whence {
+ case SeekSet, SeekCur, SeekEnd:
+ default:
+ return 0, errInvalid
+ }
+
+ ret := C.ceph_lseek(f.mount.mount, f.fd, C.int64_t(offset), C.int(whence))
+ if ret < 0 {
+ return 0, getError(C.int(ret))
+ }
+ return int64(ret), nil
+}
+
+// Fchmod changes the mode bits (permissions) of a file.
+//
+// Implements:
+//
+// int ceph_fchmod(struct ceph_mount_info *cmount, int fd, mode_t mode);
+func (f *File) Fchmod(mode uint32) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+
+ ret := C.ceph_fchmod(f.mount.mount, f.fd, C.mode_t(mode))
+ return getError(ret)
+}
+
+// Fchown changes the ownership of a file.
+//
+// Implements:
+//
+// int ceph_fchown(struct ceph_mount_info *cmount, int fd, int uid, int gid);
+func (f *File) Fchown(user uint32, group uint32) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+
+ ret := C.ceph_fchown(f.mount.mount, f.fd, C.int(user), C.int(group))
+ return getError(ret)
+}
+
+// Fstatx returns information about an open file.
+//
+// Implements:
+//
+// int ceph_fstatx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx,
+// unsigned int want, unsigned int flags);
+func (f *File) Fstatx(want StatxMask, flags AtFlags) (*CephStatx, error) {
+ if err := f.validate(); err != nil {
+ return nil, err
+ }
+
+ var stx C.struct_ceph_statx
+ ret := C.ceph_fstatx(
+ f.mount.mount,
+ f.fd,
+ &stx,
+ C.uint(want),
+ C.uint(flags),
+ )
+ if err := getError(ret); err != nil {
+ return nil, err
+ }
+ return cStructToCephStatx(stx), nil
+}
+
+// FallocFlags represent flags which determine the operation to be
+// performed on the given range.
+// CephFS supports only following two flags.
+type FallocFlags int
+
+const (
+ // FallocNoFlag means default option.
+ FallocNoFlag = FallocFlags(0)
+ // FallocFlKeepSize specifies that the file size will not be changed.
+ FallocFlKeepSize = FallocFlags(C.FALLOC_FL_KEEP_SIZE)
+ // FallocFlPunchHole specifies that the operation is to deallocate
+ // space and zero the byte range.
+ FallocFlPunchHole = FallocFlags(C.FALLOC_FL_PUNCH_HOLE)
+)
+
+// Fallocate preallocates or releases disk space for the file for the
+// given byte range, the flags determine the operation to be performed
+// on the given range.
+//
+// Implements:
+//
+// int ceph_fallocate(struct ceph_mount_info *cmount, int fd, int mode,
+// int64_t offset, int64_t length);
+func (f *File) Fallocate(mode FallocFlags, offset, length int64) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+ ret := C.ceph_fallocate(f.mount.mount, f.fd, C.int(mode), C.int64_t(offset), C.int64_t(length))
+ return getError(ret)
+}
+
+// LockOp determines operations/type of locks which can be applied on a file.
+type LockOp int
+
+const (
+ // LockSH places a shared lock.
+ // More than one process may hold a shared lock for a given file at a given time.
+ LockSH = LockOp(C.LOCK_SH)
+ // LockEX places an exclusive lock.
+ // Only one process may hold an exclusive lock for a given file at a given time.
+ LockEX = LockOp(C.LOCK_EX)
+ // LockUN removes an existing lock held by this process.
+ LockUN = LockOp(C.LOCK_UN)
+ // LockNB can be ORed with any of the above to make a nonblocking call.
+ LockNB = LockOp(C.LOCK_NB)
+)
+
+// Flock applies or removes an advisory lock on an open file.
+// Param owner is the user-supplied identifier for the owner of the
+// lock, must be an arbitrary integer.
+//
+// Implements:
+//
+// int ceph_flock(struct ceph_mount_info *cmount, int fd, int operation, uint64_t owner);
+func (f *File) Flock(operation LockOp, owner uint64) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+
+ // validate the operation values before passing it on.
+ switch operation &^ LockNB {
+ case LockSH, LockEX, LockUN:
+ default:
+ return errInvalid
+ }
+
+ ret := C.ceph_flock(f.mount.mount, f.fd, C.int(operation), C.uint64_t(owner))
+ return getError(ret)
+}
+
+// Fsync ensures the file content that may be cached is committed to stable
+// storage.
+// Pass SyncAll to have this call behave like standard fsync and synchronize
+// all data and metadata.
+// Pass SyncDataOnly to have this call behave more like fdatasync (on linux).
+//
+// Implements:
+//
+// int ceph_fsync(struct ceph_mount_info *cmount, int fd, int syncdataonly);
+func (f *File) Fsync(sync SyncChoice) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+
+ ret := C.ceph_fsync(
+ f.mount.mount,
+ f.fd,
+ C.int(sync),
+ )
+ return getError(ret)
+}
+
+// Sync ensures the file content that may be cached is committed to stable
+// storage.
+// Sync behaves like Go's os package File.Sync function.
+func (f *File) Sync() error {
+ return f.Fsync(SyncAll)
+}
+
+// Truncate sets the size of the open file.
+// NOTE: In some versions of ceph a bug exists where calling ftruncate on a
+// file open for read-only is permitted. The go-ceph wrapper does no additional
+// checking and will inherit the issue on affected versions of ceph. Please
+// refer to the following issue for details:
+// https://tracker.ceph.com/issues/48202
+//
+// Implements:
+//
+// int ceph_ftruncate(struct ceph_mount_info *cmount, int fd, int64_t size);
+func (f *File) Truncate(size int64) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+
+ ret := C.ceph_ftruncate(
+ f.mount.mount,
+ f.fd,
+ C.int64_t(size),
+ )
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/file_ops.go b/vendor/github.com/ceph/go-ceph/cephfs/file_ops.go
new file mode 100644
index 000000000..4c3c5e34a
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/file_ops.go
@@ -0,0 +1,132 @@
+//go:build !nautilus
+// +build !nautilus
+
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+#include
+*/
+import "C"
+
+import (
+ ts "github.com/ceph/go-ceph/internal/timespec"
+ "unsafe"
+)
+
+// Mknod creates a regular, block or character special file.
+//
+// Implements:
+//
+// int ceph_mknod(struct ceph_mount_info *cmount, const char *path, mode_t mode,
+// dev_t rdev);
+func (mount *MountInfo) Mknod(path string, mode uint16, dev uint16) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_mknod(mount.mount, cPath, C.mode_t(mode), C.dev_t(dev))
+ return getError(ret)
+}
+
+// Utime struct is the equivalent of C.struct_utimbuf
+type Utime struct {
+ // AcTime represents the file's access time in seconds since the Unix epoch.
+ AcTime int64
+ // ModTime represents the file's modification time in seconds since the Unix epoch.
+ ModTime int64
+}
+
+// Futime changes file/directory last access and modification times.
+//
+// Implements:
+//
+// int ceph_futime(struct ceph_mount_info *cmount, int fd, struct utimbuf *buf);
+func (mount *MountInfo) Futime(fd int, times *Utime) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+
+ cFd := C.int(fd)
+ uTimeBuf := &C.struct_utimbuf{
+ actime: C.time_t(times.AcTime),
+ modtime: C.time_t(times.ModTime),
+ }
+
+ ret := C.ceph_futime(mount.mount, cFd, uTimeBuf)
+ return getError(ret)
+}
+
+// Timeval struct is the go equivalent of C.struct_timeval type
+type Timeval struct {
+ // Sec represents seconds
+ Sec int64
+ // USec represents microseconds
+ USec int64
+}
+
+// Futimens changes file/directory last access and modification times, here times param
+// is an array of Timespec struct having length 2, where times[0] represents the access time
+// and times[1] represents the modification time.
+//
+// Implements:
+//
+// int ceph_futimens(struct ceph_mount_info *cmount, int fd, struct timespec times[2]);
+func (mount *MountInfo) Futimens(fd int, times []Timespec) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+
+ if len(times) != 2 {
+ return getError(-C.EINVAL)
+ }
+
+ cFd := C.int(fd)
+ cTimes := []C.struct_timespec{}
+ for _, val := range times {
+ cTs := &C.struct_timespec{}
+ ts.CopyToCStruct(
+ ts.Timespec(val),
+ ts.CTimespecPtr(cTs),
+ )
+ cTimes = append(cTimes, *cTs)
+ }
+
+ ret := C.ceph_futimens(mount.mount, cFd, &cTimes[0])
+ return getError(ret)
+}
+
+// Futimes changes file/directory last access and modification times, here times param
+// is an array of Timeval struct type having length 2, where times[0] represents the access time
+// and times[1] represents the modification time.
+//
+// Implements:
+//
+// int ceph_futimes(struct ceph_mount_info *cmount, int fd, struct timeval times[2]);
+func (mount *MountInfo) Futimes(fd int, times []Timeval) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+
+ if len(times) != 2 {
+ return getError(-C.EINVAL)
+ }
+
+ cFd := C.int(fd)
+ cTimes := []C.struct_timeval{}
+ for _, val := range times {
+ cTimes = append(cTimes, C.struct_timeval{
+ tv_sec: C.time_t(val.Sec),
+ tv_usec: C.suseconds_t(val.USec),
+ })
+ }
+
+ ret := C.ceph_futimes(mount.mount, cFd, &cTimes[0])
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/file_xattr.go b/vendor/github.com/ceph/go-ceph/cephfs/file_xattr.go
new file mode 100644
index 000000000..4a2da530c
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/file_xattr.go
@@ -0,0 +1,163 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#define _GNU_SOURCE
+#include
+#include
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+
+ "github.com/ceph/go-ceph/internal/cutil"
+ "github.com/ceph/go-ceph/internal/retry"
+)
+
+// XattrFlags are used to control the behavior of set-xattr calls.
+type XattrFlags int
+
+const (
+ // XattrDefault specifies that set-xattr calls use the default behavior of
+ // creating or updating an xattr.
+ XattrDefault = XattrFlags(0)
+ // XattrCreate specifies that set-xattr calls only set new xattrs.
+ XattrCreate = XattrFlags(C.XATTR_CREATE)
+ // XattrReplace specifies that set-xattr calls only replace existing xattr
+ // values.
+ XattrReplace = XattrFlags(C.XATTR_REPLACE)
+)
+
+// SetXattr sets an extended attribute on the open file.
+//
+// NOTE: Attempting to set an xattr value with an empty value may cause the
+// xattr to be unset on some older versions of ceph.
+// Please refer to https://tracker.ceph.com/issues/46084
+//
+// Implements:
+//
+// int ceph_fsetxattr(struct ceph_mount_info *cmount, int fd, const char *name,
+// const void *value, size_t size, int flags);
+func (f *File) SetXattr(name string, value []byte, flags XattrFlags) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+ if name == "" {
+ return errInvalid
+ }
+ var vptr unsafe.Pointer
+ if len(value) > 0 {
+ vptr = unsafe.Pointer(&value[0])
+ }
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_fsetxattr(
+ f.mount.mount,
+ f.fd,
+ cName,
+ vptr,
+ C.size_t(len(value)),
+ C.int(flags))
+ return getError(ret)
+}
+
+// GetXattr gets an extended attribute from the open file.
+//
+// Implements:
+//
+// int ceph_fgetxattr(struct ceph_mount_info *cmount, int fd, const char *name,
+// void *value, size_t size);
+func (f *File) GetXattr(name string) ([]byte, error) {
+ if err := f.validate(); err != nil {
+ return nil, err
+ }
+ if name == "" {
+ return nil, errInvalid
+ }
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ var (
+ ret C.int
+ err error
+ buf []byte
+ )
+ // range from 1k to 64KiB
+ retry.WithSizes(1024, 1<<16, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret = C.ceph_fgetxattr(
+ f.mount.mount,
+ f.fd,
+ cName,
+ unsafe.Pointer(&buf[0]),
+ C.size_t(size))
+ err = getErrorIfNegative(ret)
+ return retry.DoubleSize.If(err == errRange)
+ })
+ if err != nil {
+ return nil, err
+ }
+ return buf[:ret], nil
+}
+
+// ListXattr returns a slice containing strings for the name of each xattr set
+// on the file.
+//
+// Implements:
+//
+// int ceph_flistxattr(struct ceph_mount_info *cmount, int fd, char *list, size_t size);
+func (f *File) ListXattr() ([]string, error) {
+ if err := f.validate(); err != nil {
+ return nil, err
+ }
+
+ var (
+ ret C.int
+ err error
+ buf []byte
+ )
+ // range from 1k to 64KiB
+ retry.WithSizes(1024, 1<<16, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret = C.ceph_flistxattr(
+ f.mount.mount,
+ f.fd,
+ (*C.char)(unsafe.Pointer(&buf[0])),
+ C.size_t(size))
+ err = getErrorIfNegative(ret)
+ return retry.DoubleSize.If(err == errRange)
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ names := cutil.SplitSparseBuffer(buf[:ret])
+ return names, nil
+}
+
+// RemoveXattr removes the named xattr from the open file.
+//
+// Implements:
+//
+// int ceph_fremovexattr(struct ceph_mount_info *cmount, int fd, const char *name);
+func (f *File) RemoveXattr(name string) error {
+ if err := f.validate(); err != nil {
+ return err
+ }
+ if name == "" {
+ return errInvalid
+ }
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_fremovexattr(
+ f.mount.mount,
+ f.fd,
+ cName)
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/makedirs.go b/vendor/github.com/ceph/go-ceph/cephfs/makedirs.go
new file mode 100644
index 000000000..24b0c34ab
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/makedirs.go
@@ -0,0 +1,29 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+// MakeDirs creates multiple directories at once.
+//
+// Implements:
+//
+// int ceph_mkdirs(struct ceph_mount_info *cmount, const char *path, mode_t mode);
+func (mount *MountInfo) MakeDirs(path string, mode uint32) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_mkdirs(mount.mount, cPath, C.mode_t(mode))
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/mount_perms_mimic.go b/vendor/github.com/ceph/go-ceph/cephfs/mount_perms_mimic.go
new file mode 100644
index 000000000..83c8c8df9
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/mount_perms_mimic.go
@@ -0,0 +1,22 @@
+//
+// ceph_mount_perms_set available in mimic & later
+
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+*/
+import "C"
+
+// SetMountPerms applies the given UserPerm to the mount object, which it will
+// then use to define the connection's ownership credentials.
+// This function must be called after Init but before Mount.
+//
+// Implements:
+//
+// int ceph_mount_perms_set(struct ceph_mount_info *cmount, UserPerm *perm);
+func (mount *MountInfo) SetMountPerms(perm *UserPerm) error {
+ return getError(C.ceph_mount_perms_set(mount.mount, perm.userPerm))
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/path.go b/vendor/github.com/ceph/go-ceph/cephfs/path.go
new file mode 100644
index 000000000..9bb608c77
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/path.go
@@ -0,0 +1,201 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+// CurrentDir gets the current working directory.
+func (mount *MountInfo) CurrentDir() string {
+ if err := mount.validate(); err != nil {
+ return ""
+ }
+ cDir := C.ceph_getcwd(mount.mount)
+ return C.GoString(cDir)
+}
+
+// ChangeDir changes the current working directory.
+func (mount *MountInfo) ChangeDir(path string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_chdir(mount.mount, cPath)
+ return getError(ret)
+}
+
+// MakeDir creates a directory.
+func (mount *MountInfo) MakeDir(path string, mode uint32) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_mkdir(mount.mount, cPath, C.mode_t(mode))
+ return getError(ret)
+}
+
+// RemoveDir removes a directory.
+func (mount *MountInfo) RemoveDir(path string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_rmdir(mount.mount, cPath)
+ return getError(ret)
+}
+
+// Unlink removes a file.
+//
+// Implements:
+//
+// int ceph_unlink(struct ceph_mount_info *cmount, const char *path);
+func (mount *MountInfo) Unlink(path string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_unlink(mount.mount, cPath)
+ return getError(ret)
+}
+
+// Link creates a new link to an existing file.
+//
+// Implements:
+//
+// int ceph_link (struct ceph_mount_info *cmount, const char *existing, const char *newname);
+func (mount *MountInfo) Link(oldname, newname string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cOldname := C.CString(oldname)
+ defer C.free(unsafe.Pointer(cOldname))
+
+ cNewname := C.CString(newname)
+ defer C.free(unsafe.Pointer(cNewname))
+
+ ret := C.ceph_link(mount.mount, cOldname, cNewname)
+ return getError(ret)
+}
+
+// Symlink creates a symbolic link to an existing path.
+//
+// Implements:
+//
+// int ceph_symlink(struct ceph_mount_info *cmount, const char *existing, const char *newname);
+func (mount *MountInfo) Symlink(existing, newname string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cExisting := C.CString(existing)
+ defer C.free(unsafe.Pointer(cExisting))
+
+ cNewname := C.CString(newname)
+ defer C.free(unsafe.Pointer(cNewname))
+
+ ret := C.ceph_symlink(mount.mount, cExisting, cNewname)
+ return getError(ret)
+}
+
+// Readlink returns the value of a symbolic link.
+//
+// Implements:
+//
+// int ceph_readlink(struct ceph_mount_info *cmount, const char *path, char *buf, int64_t size);
+func (mount *MountInfo) Readlink(path string) (string, error) {
+ if err := mount.validate(); err != nil {
+ return "", err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ buf := make([]byte, 4096)
+ ret := C.ceph_readlink(mount.mount,
+ cPath,
+ (*C.char)(unsafe.Pointer(&buf[0])),
+ C.int64_t(len(buf)))
+ if ret < 0 {
+ return "", getError(ret)
+ }
+
+ return string(buf[:ret]), nil
+}
+
+// Statx returns information about a file/directory.
+//
+// Implements:
+//
+// int ceph_statx(struct ceph_mount_info *cmount, const char *path, struct ceph_statx *stx,
+// unsigned int want, unsigned int flags);
+func (mount *MountInfo) Statx(path string, want StatxMask, flags AtFlags) (*CephStatx, error) {
+ if err := mount.validate(); err != nil {
+ return nil, err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var stx C.struct_ceph_statx
+ ret := C.ceph_statx(
+ mount.mount,
+ cPath,
+ &stx,
+ C.uint(want),
+ C.uint(flags),
+ )
+ if err := getError(ret); err != nil {
+ return nil, err
+ }
+ return cStructToCephStatx(stx), nil
+}
+
+// Rename a file or directory.
+//
+// Implements:
+//
+// int ceph_rename(struct ceph_mount_info *cmount, const char *from, const char *to);
+func (mount *MountInfo) Rename(from, to string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cFrom := C.CString(from)
+ defer C.free(unsafe.Pointer(cFrom))
+ cTo := C.CString(to)
+ defer C.free(unsafe.Pointer(cTo))
+
+ ret := C.ceph_rename(mount.mount, cFrom, cTo)
+ return getError(ret)
+}
+
+// Truncate sets the size of the specified file.
+//
+// Implements:
+//
+// int ceph_truncate(struct ceph_mount_info *cmount, const char *path, int64_t size);
+func (mount *MountInfo) Truncate(path string, size int64) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_truncate(
+ mount.mount,
+ cPath,
+ C.int64_t(size),
+ )
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/path_xattr.go b/vendor/github.com/ceph/go-ceph/cephfs/path_xattr.go
new file mode 100644
index 000000000..0c0534f52
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/path_xattr.go
@@ -0,0 +1,291 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#define _GNU_SOURCE
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+
+ "github.com/ceph/go-ceph/internal/cutil"
+ "github.com/ceph/go-ceph/internal/retry"
+)
+
+// SetXattr sets an extended attribute on the file at the supplied path.
+//
+// NOTE: Attempting to set an xattr value with an empty value may cause
+// the xattr to be unset. Please refer to https://tracker.ceph.com/issues/46084
+//
+// Implements:
+//
+// int ceph_setxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+// const void *value, size_t size, int flags);
+func (mount *MountInfo) SetXattr(path, name string, value []byte, flags XattrFlags) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ if name == "" {
+ return errInvalid
+ }
+ var vptr unsafe.Pointer
+ if len(value) > 0 {
+ vptr = unsafe.Pointer(&value[0])
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_setxattr(
+ mount.mount,
+ cPath,
+ cName,
+ vptr,
+ C.size_t(len(value)),
+ C.int(flags))
+ return getError(ret)
+}
+
+// GetXattr gets an extended attribute from the file at the supplied path.
+//
+// Implements:
+//
+// int ceph_getxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+// void *value, size_t size);
+func (mount *MountInfo) GetXattr(path, name string) ([]byte, error) {
+ if err := mount.validate(); err != nil {
+ return nil, err
+ }
+ if name == "" {
+ return nil, errInvalid
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ var (
+ ret C.int
+ err error
+ buf []byte
+ )
+ // range from 1k to 64KiB
+ retry.WithSizes(1024, 1<<16, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret = C.ceph_getxattr(
+ mount.mount,
+ cPath,
+ cName,
+ unsafe.Pointer(&buf[0]),
+ C.size_t(size))
+ err = getErrorIfNegative(ret)
+ return retry.DoubleSize.If(err == errRange)
+ })
+ if err != nil {
+ return nil, err
+ }
+ return buf[:ret], nil
+}
+
+// ListXattr returns a slice containing strings for the name of each xattr set
+// on the file at the supplied path.
+//
+// Implements:
+//
+// int ceph_listxattr(struct ceph_mount_info *cmount, const char *path, char *list, size_t size);
+func (mount *MountInfo) ListXattr(path string) ([]string, error) {
+ if err := mount.validate(); err != nil {
+ return nil, err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var (
+ ret C.int
+ err error
+ buf []byte
+ )
+ // range from 1k to 64KiB
+ retry.WithSizes(1024, 1<<16, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret = C.ceph_listxattr(
+ mount.mount,
+ cPath,
+ (*C.char)(unsafe.Pointer(&buf[0])),
+ C.size_t(size))
+ err = getErrorIfNegative(ret)
+ return retry.DoubleSize.If(err == errRange)
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ names := cutil.SplitSparseBuffer(buf[:ret])
+ return names, nil
+}
+
+// RemoveXattr removes the named xattr from the open file.
+//
+// Implements:
+//
+// int ceph_removexattr(struct ceph_mount_info *cmount, const char *path, const char *name);
+func (mount *MountInfo) RemoveXattr(path, name string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ if name == "" {
+ return errInvalid
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_removexattr(
+ mount.mount,
+ cPath,
+ cName)
+ return getError(ret)
+}
+
+// LsetXattr sets an extended attribute on the file at the supplied path.
+//
+// NOTE: Attempting to set an xattr value with an empty value may cause
+// the xattr to be unset. Please refer to https://tracker.ceph.com/issues/46084
+//
+// Implements:
+//
+// int ceph_lsetxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+// const void *value, size_t size, int flags);
+func (mount *MountInfo) LsetXattr(path, name string, value []byte, flags XattrFlags) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ if name == "" {
+ return errInvalid
+ }
+ var vptr unsafe.Pointer
+ if len(value) > 0 {
+ vptr = unsafe.Pointer(&value[0])
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_lsetxattr(
+ mount.mount,
+ cPath,
+ cName,
+ vptr,
+ C.size_t(len(value)),
+ C.int(flags))
+ return getError(ret)
+}
+
+// LgetXattr gets an extended attribute from the file at the supplied path.
+//
+// Implements:
+//
+// int ceph_lgetxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+// void *value, size_t size);
+func (mount *MountInfo) LgetXattr(path, name string) ([]byte, error) {
+ if err := mount.validate(); err != nil {
+ return nil, err
+ }
+ if name == "" {
+ return nil, errInvalid
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ var (
+ ret C.int
+ err error
+ buf []byte
+ )
+ // range from 1k to 64KiB
+ retry.WithSizes(1024, 1<<16, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret = C.ceph_lgetxattr(
+ mount.mount,
+ cPath,
+ cName,
+ unsafe.Pointer(&buf[0]),
+ C.size_t(size))
+ err = getErrorIfNegative(ret)
+ return retry.DoubleSize.If(err == errRange)
+ })
+ if err != nil {
+ return nil, err
+ }
+ return buf[:ret], nil
+}
+
+// LlistXattr returns a slice containing strings for the name of each xattr set
+// on the file at the supplied path.
+//
+// Implements:
+//
+// int ceph_llistxattr(struct ceph_mount_info *cmount, const char *path, char *list, size_t size);
+func (mount *MountInfo) LlistXattr(path string) ([]string, error) {
+ if err := mount.validate(); err != nil {
+ return nil, err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var (
+ ret C.int
+ err error
+ buf []byte
+ )
+ // range from 1k to 64KiB
+ retry.WithSizes(1024, 1<<16, func(size int) retry.Hint {
+ buf = make([]byte, size)
+ ret = C.ceph_llistxattr(
+ mount.mount,
+ cPath,
+ (*C.char)(unsafe.Pointer(&buf[0])),
+ C.size_t(size))
+ err = getErrorIfNegative(ret)
+ return retry.DoubleSize.If(err == errRange)
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ names := cutil.SplitSparseBuffer(buf[:ret])
+ return names, nil
+}
+
+// LremoveXattr removes the named xattr from the open file.
+//
+// Implements:
+//
+// int ceph_lremovexattr(struct ceph_mount_info *cmount, const char *path, const char *name);
+func (mount *MountInfo) LremoveXattr(path, name string) error {
+ if err := mount.validate(); err != nil {
+ return err
+ }
+ if name == "" {
+ return errInvalid
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_lremovexattr(
+ mount.mount,
+ cPath,
+ cName)
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/permissions.go b/vendor/github.com/ceph/go-ceph/cephfs/permissions.go
new file mode 100644
index 000000000..d55470bef
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/permissions.go
@@ -0,0 +1,40 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+// Chmod changes the mode bits (permissions) of a file/directory.
+func (mount *MountInfo) Chmod(path string, mode uint32) error {
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_chmod(mount.mount, cPath, C.mode_t(mode))
+ return getError(ret)
+}
+
+// Chown changes the ownership of a file/directory.
+func (mount *MountInfo) Chown(path string, user uint32, group uint32) error {
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_chown(mount.mount, cPath, C.int(user), C.int(group))
+ return getError(ret)
+}
+
+// Lchown changes the ownership of a file/directory/etc without following symbolic links
+func (mount *MountInfo) Lchown(path string, user uint32, group uint32) error {
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ ret := C.ceph_lchown(mount.mount, cPath, C.int(user), C.int(group))
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/select_fs.go b/vendor/github.com/ceph/go-ceph/cephfs/select_fs.go
new file mode 100644
index 000000000..d776ee76f
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/select_fs.go
@@ -0,0 +1,31 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#define _GNU_SOURCE
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+// SelectFilesystem selects a file system to be mounted. If the ceph cluster
+// supports more than one cephfs this optional function selects which one to
+// use. Can only be called prior to calling Mount. The name of the file system
+// is not validated by this call - if the supplied file system name is not
+// valid then only the subsequent mount call will fail.
+//
+// Implements:
+//
+// int ceph_select_filesystem(struct ceph_mount_info *cmount, const char *fs_name);
+func (mount *MountInfo) SelectFilesystem(name string) error {
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+
+ ret := C.ceph_select_filesystem(mount.mount, cName)
+ return getError(ret)
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/statfs.go b/vendor/github.com/ceph/go-ceph/cephfs/statfs.go
new file mode 100644
index 000000000..292cb2405
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/statfs.go
@@ -0,0 +1,75 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#include
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+// CephStatVFS instances are returned from the StatFS call. It reports
+// file-system wide statistics.
+type CephStatVFS struct {
+ // Bsize reports the file system's block size.
+ Bsize int64
+ // Fragment reports the file system's fragment size.
+ Frsize int64
+ // Blocks reports the number of blocks in the file system.
+ Blocks uint64
+ // Bfree reports the number of free blocks.
+ Bfree uint64
+ // Bavail reports the number of free blocks for unprivileged users.
+ Bavail uint64
+ // Files reports the number of inodes in the file system.
+ Files uint64
+ // Ffree reports the number of free indoes.
+ Ffree uint64
+ // Favail reports the number of free indoes for unprivileged users.
+ Favail uint64
+ // Fsid reports the file system ID number.
+ Fsid int64
+ // Flag reports the file system mount flags.
+ Flag int64
+ // Namemax reports the maximum file name length.
+ Namemax int64
+}
+
+// StatFS returns file system wide statistics.
+// NOTE: Many of the statistics fields reported by ceph are not filled in with
+// useful values.
+//
+// Implements:
+//
+// int ceph_statfs(struct ceph_mount_info *cmount, const char *path, struct statvfs *stbuf);
+func (mount *MountInfo) StatFS(path string) (*CephStatVFS, error) {
+ if err := mount.validate(); err != nil {
+ return nil, err
+ }
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var statvfs C.struct_statvfs
+ ret := C.ceph_statfs(mount.mount, cPath, &statvfs)
+ if ret != 0 {
+ return nil, getError(ret)
+ }
+ csfs := &CephStatVFS{
+ Bsize: int64(statvfs.f_bsize),
+ Frsize: int64(statvfs.f_frsize),
+ Blocks: uint64(statvfs.f_blocks),
+ Bfree: uint64(statvfs.f_bfree),
+ Bavail: uint64(statvfs.f_bavail),
+ Files: uint64(statvfs.f_files),
+ Ffree: uint64(statvfs.f_ffree),
+ Favail: uint64(statvfs.f_favail),
+ Fsid: int64(statvfs.f_fsid),
+ Flag: int64(statvfs.f_flag),
+ Namemax: int64(statvfs.f_namemax),
+ }
+ return csfs, nil
+}
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/statx.go b/vendor/github.com/ceph/go-ceph/cephfs/statx.go
new file mode 100644
index 000000000..5cc1b8592
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/statx.go
@@ -0,0 +1,156 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+#ifndef AT_STATX_DONT_SYNC
+// for versions earlier than Pacific
+#define AT_STATX_DONT_SYNC AT_NO_ATTR_SYNC
+#endif
+*/
+import "C"
+
+import (
+ ts "github.com/ceph/go-ceph/internal/timespec"
+)
+
+// Timespec is a public type for the internal C 'struct timespec'
+type Timespec ts.Timespec
+
+// StatxMask values contain bit-flags indicating what data should be
+// populated by a statx-type call.
+type StatxMask uint32
+
+const (
+ // StatxMode requests the mode value be filled in.
+ StatxMode = StatxMask(C.CEPH_STATX_MODE)
+ // StatxNlink requests the nlink value be filled in.
+ StatxNlink = StatxMask(C.CEPH_STATX_NLINK)
+ // StatxUid requests the uid value be filled in.
+ StatxUid = StatxMask(C.CEPH_STATX_UID)
+ // StatxRdev requests the rdev value be filled in.
+ StatxRdev = StatxMask(C.CEPH_STATX_RDEV)
+ // StatxAtime requests the access-time value be filled in.
+ StatxAtime = StatxMask(C.CEPH_STATX_ATIME)
+ // StatxMtime requests the modified-time value be filled in.
+ StatxMtime = StatxMask(C.CEPH_STATX_MTIME)
+ // StatxIno requests the inode be filled in.
+ StatxIno = StatxMask(C.CEPH_STATX_INO)
+ // StatxSize requests the size value be filled in.
+ StatxSize = StatxMask(C.CEPH_STATX_SIZE)
+ // StatxBlocks requests the blocks value be filled in.
+ StatxBlocks = StatxMask(C.CEPH_STATX_BLOCKS)
+ // StatxBasicStats requests all the fields that are part of a
+ // traditional stat call.
+ StatxBasicStats = StatxMask(C.CEPH_STATX_BASIC_STATS)
+ // StatxBtime requests the birth-time value be filled in.
+ StatxBtime = StatxMask(C.CEPH_STATX_BTIME)
+ // StatxVersion requests the version value be filled in.
+ StatxVersion = StatxMask(C.CEPH_STATX_VERSION)
+ // StatxAllStats requests all known stat values be filled in.
+ StatxAllStats = StatxMask(C.CEPH_STATX_ALL_STATS)
+)
+
+// AtFlags represent flags to be passed to calls that control how files
+// are used or referenced. For example, not following symlinks.
+type AtFlags uint
+
+const (
+ // AtStatxDontSync requests that the stat call only fetch locally-cached
+ // values if possible, avoiding round trips to a back-end server.
+ AtStatxDontSync = AtFlags(C.AT_STATX_DONT_SYNC)
+ // AtNoAttrSync requests that the stat call only fetch locally-cached
+ // values if possible, avoiding round trips to a back-end server.
+ //
+ // Deprecated: replaced by AtStatxDontSync
+ AtNoAttrSync = AtStatxDontSync
+ // AtSymlinkNofollow indicates the call should not follow symlinks
+ // but operate on the symlink itself.
+ AtSymlinkNofollow = AtFlags(C.AT_SYMLINK_NOFOLLOW)
+)
+
+// NOTE: CephStatx fields are meant to be settable by the callers.
+// This is the primary reason we use public fields and not accessors
+// for the CephStatx type.
+
+// CephStatx instances are returned by extended stat (statx) calls.
+// Note that CephStatx results are similar to but not identical
+// to (Linux) system statx results.
+type CephStatx struct {
+ // Mask is a bitmask indicating what fields have been set.
+ Mask StatxMask
+ // Blksize represents the file system's block size.
+ Blksize uint32
+ // Nlink is the number of links for the file.
+ Nlink uint32
+ // Uid (user id) value for the file.
+ Uid uint32
+ // Gid (group id) value for the file.
+ Gid uint32
+ // Mode is the file's type and mode value.
+ Mode uint16
+ // Inode value for the file.
+ Inode Inode
+ // Size of the file in bytes.
+ Size uint64
+ // Blocks indicates the number of blocks allocated to the file.
+ Blocks uint64
+ // Dev describes the device containing this file system.
+ Dev uint64
+ // Rdev describes the device of this file, if the file is a device.
+ Rdev uint64
+ // Atime is the access time of this file.
+ Atime Timespec
+ // Ctime is the status change time of this file.
+ Ctime Timespec
+ // Mtime is the modification time of this file.
+ Mtime Timespec
+ // Btime is the creation (birth) time of this file.
+ Btime Timespec
+ // Version value for the file.
+ Version uint64
+}
+
+func cStructToCephStatx(s C.struct_ceph_statx) *CephStatx {
+ return &CephStatx{
+ Mask: StatxMask(s.stx_mask),
+ Blksize: uint32(s.stx_blksize),
+ Nlink: uint32(s.stx_nlink),
+ Uid: uint32(s.stx_uid),
+ Gid: uint32(s.stx_gid),
+ Mode: uint16(s.stx_mode),
+ Inode: Inode(s.stx_ino),
+ Size: uint64(s.stx_size),
+ Blocks: uint64(s.stx_blocks),
+ Dev: uint64(s.stx_dev),
+ Rdev: uint64(s.stx_rdev),
+ Atime: Timespec(ts.CStructToTimespec(ts.CTimespecPtr(&s.stx_atime))),
+ Ctime: Timespec(ts.CStructToTimespec(ts.CTimespecPtr(&s.stx_ctime))),
+ Mtime: Timespec(ts.CStructToTimespec(ts.CTimespecPtr(&s.stx_mtime))),
+ Btime: Timespec(ts.CStructToTimespec(ts.CTimespecPtr(&s.stx_btime))),
+ Version: uint64(s.stx_version),
+ }
+}
+
+/* TODO:
+ - enable later when we can test round -trips
+ - add time fields
+
+func (c *CephStatx) toCStruct() C.struct_ceph_statx {
+ var s C.struct_ceph_statx
+ s.stx_mask = C.uint32_t(c.Mask)
+ s.stx_blksize = C.uint32_t(c.Blksize)
+ s.stx_nlink = C.uint32_t(c.Nlink)
+ s.stx_uid = C.uint32_t(c.Uid)
+ s.stx_gid = C.uint32_t(c.Gid)
+ s.stx_mode = C.uint16_t(c.Mode)
+ s.stx_ino = C.uint64_t(c.Inode)
+ s.stx_size = C.uint64_t(c.Size)
+ s.stx_blocks = C.uint64_t(c.Blocks)
+ s.stx_dev = C.uint64_t(c.Dev)
+ s.stx_rdev = C.uint64_t(c.Rdev)
+ s.stx_version = C.uint64_t(c.Version)
+ return s
+}
+*/
diff --git a/vendor/github.com/ceph/go-ceph/cephfs/userperm.go b/vendor/github.com/ceph/go-ceph/cephfs/userperm.go
new file mode 100644
index 000000000..df214dea8
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/cephfs/userperm.go
@@ -0,0 +1,79 @@
+package cephfs
+
+/*
+#cgo LDFLAGS: -lcephfs
+#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64
+#include
+*/
+import "C"
+
+import (
+ "runtime"
+ "unsafe"
+
+ "github.com/ceph/go-ceph/internal/log"
+)
+
+// UserPerm types may be used to get or change the credentials used by the
+// connection or some operations.
+type UserPerm struct {
+ userPerm *C.UserPerm
+
+ // cache create-time params
+ managed bool // if set, the userPerm was created by go-ceph
+ uid C.uid_t
+ gid C.gid_t
+ gidList []C.gid_t
+}
+
+// NewUserPerm creates a UserPerm pointer and the underlying ceph resources.
+//
+// Implements:
+//
+// UserPerm *ceph_userperm_new(uid_t uid, gid_t gid, int ngids, gid_t *gidlist);
+func NewUserPerm(uid, gid int, gidlist []int) *UserPerm {
+ // the C code does not copy the content of the gid list so we keep the
+ // inputs stashed in the go type. For completeness we stash everything.
+ p := &UserPerm{
+ managed: true,
+ uid: C.uid_t(uid),
+ gid: C.gid_t(gid),
+ gidList: make([]C.gid_t, len(gidlist)),
+ }
+ var cgids *C.gid_t
+ if len(p.gidList) > 0 {
+ for i, gid := range gidlist {
+ p.gidList[i] = C.gid_t(gid)
+ }
+ cgids = (*C.gid_t)(unsafe.Pointer(&p.gidList[0]))
+ }
+ p.userPerm = C.ceph_userperm_new(
+ p.uid, p.gid, C.int(len(p.gidList)), cgids)
+ // if the go object is unreachable, we would like to free the c-memory
+ // since this has no other resources than memory associated with it.
+ // This is only valid for UserPerm objects created by new, and thus have
+ // the managed var set.
+ runtime.SetFinalizer(p, destroyUserPerm)
+ return p
+}
+
+// Destroy will explicitly free ceph resources associated with the UserPerm.
+//
+// Implements:
+//
+// void ceph_userperm_destroy(UserPerm *perm);
+func (p *UserPerm) Destroy() {
+ if p.userPerm == nil || !p.managed {
+ return
+ }
+ C.ceph_userperm_destroy(p.userPerm)
+ p.userPerm = nil
+ p.gidList = nil
+}
+
+func destroyUserPerm(p *UserPerm) {
+ if p.userPerm != nil && p.managed {
+ log.Warnf("unreachable UserPerm object has not been destroyed. Cleaning up.")
+ }
+ p.Destroy()
+}
diff --git a/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard.go b/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard.go
index 27a4c5eb1..d49ed3bfb 100644
--- a/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard.go
+++ b/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard.go
@@ -1,90 +1,37 @@
+//go:build !go1.21
+// +build !go1.21
+
+// This code assumes a non-moving garbage collector, which is the case until at
+// least go 1.20
+
package cutil
import (
- "sync"
"unsafe"
)
// PtrGuard respresents a guarded Go pointer (pointing to memory allocated by Go
// runtime) stored in C memory (allocated by C)
type PtrGuard struct {
- // These mutexes will be used as binary semaphores for signalling events from
- // one thread to another, which - in contrast to other languages like C++ - is
- // possible in Go, that is a Mutex can be locked in one thread and unlocked in
- // another.
- stored, release sync.Mutex
- released bool
+ cPtr CPtr
+ goPtr unsafe.Pointer
}
-// WARNING: using binary semaphores (mutexes) for signalling like this is quite
-// a delicate task in order to avoid deadlocks or panics. Whenever changing the
-// code logic, please review at least three times that there is no unexpected
-// state possible. Usually the natural choice would be to use channels instead,
-// but these can not easily passed to C code because of the pointer-to-pointer
-// cgo rule, and would require the use of a Go object registry.
-
// NewPtrGuard writes the goPtr (pointing to Go memory) into C memory at the
// position cPtr, and returns a PtrGuard object.
func NewPtrGuard(cPtr CPtr, goPtr unsafe.Pointer) *PtrGuard {
var v PtrGuard
- // Since the mutexes are used for signalling, they have to be initialized to
- // locked state, so that following lock attempts will block.
- v.release.Lock()
- v.stored.Lock()
- // Start a background go routine that lives until Release is called. This
- // calls a special function that makes sure the garbage collector doesn't touch
- // goPtr, stores it into C memory at position cPtr and then waits until it
- // reveices the "release" signal, after which it nulls out the C memory at
- // cPtr and then exits.
- go func() {
- storeUntilRelease(&v, (*CPtr)(cPtr), uintptr(goPtr))
- }()
- // Wait for the "stored" signal from the go routine when the Go pointer has
- // been stored to the C memory. <--(1)
- v.stored.Lock()
+ v.cPtr = cPtr
+ v.goPtr = goPtr
+ p := (*unsafe.Pointer)(unsafe.Pointer(cPtr))
+ *p = goPtr
return &v
}
// Release removes the guarded Go pointer from the C memory by overwriting it
// with NULL.
func (v *PtrGuard) Release() {
- if !v.released {
- v.released = true
- v.release.Unlock() // Send the "release" signal to the go routine. -->(2)
- v.stored.Lock() // Wait for the second "stored" signal when the C memory
- // has been nulled out. <--(3)
-
- }
-}
-
-// The uintptrPtr() helper function below assumes that uintptr has the same size
-// as a pointer, although in theory it could be larger. Therefore we use this
-// constant expression to assert size equality as a safeguard at compile time.
-// How it works: if sizes are different, either the inner or outer expression is
-// negative, which always fails with "constant ... overflows uintptr", because
-// unsafe.Sizeof() is a uintptr typed constant.
-const _ = -(unsafe.Sizeof(uintptr(0)) - PtrSize) // size assert
-func uintptrPtr(p *CPtr) *uintptr {
- return (*uintptr)(unsafe.Pointer(p))
-}
-
-//go:uintptrescapes
-
-// From https://golang.org/src/cmd/compile/internal/gc/lex.go:
-// For the next function declared in the file any uintptr arguments may be
-// pointer values converted to uintptr. This directive ensures that the
-// referenced allocated object, if any, is retained and not moved until the call
-// completes, even though from the types alone it would appear that the object
-// is no longer needed during the call. The conversion to uintptr must appear in
-// the argument list.
-// Also see https://golang.org/cmd/compile/#hdr-Compiler_Directives
-
-func storeUntilRelease(v *PtrGuard, cPtr *CPtr, goPtr uintptr) {
- uip := uintptrPtr(cPtr)
- *uip = goPtr // store Go pointer in C memory at c_ptr
- v.stored.Unlock() // send "stored" signal to main thread -->(1)
- v.release.Lock() // wait for "release" signal from main thread when
- // Release() has been called. <--(2)
- *uip = 0 // reset C memory to NULL
- v.stored.Unlock() // send second "stored" signal to main thread -->(3)
+ p := (*unsafe.Pointer)(unsafe.Pointer(v.cPtr))
+ *p = nil
+ v.goPtr = nil
}
diff --git a/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard_pinner.go b/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard_pinner.go
new file mode 100644
index 000000000..3862b5c81
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/internal/cutil/ptrguard_pinner.go
@@ -0,0 +1,35 @@
+//go:build go1.21
+// +build go1.21
+
+package cutil
+
+import (
+ "runtime"
+ "unsafe"
+)
+
+// PtrGuard respresents a guarded Go pointer (pointing to memory allocated by Go
+// runtime) stored in C memory (allocated by C)
+type PtrGuard struct {
+ cPtr CPtr
+ pinner runtime.Pinner
+}
+
+// NewPtrGuard writes the goPtr (pointing to Go memory) into C memory at the
+// position cPtr, and returns a PtrGuard object.
+func NewPtrGuard(cPtr CPtr, goPtr unsafe.Pointer) *PtrGuard {
+ var v PtrGuard
+ v.pinner.Pin(goPtr)
+ v.cPtr = cPtr
+ p := (*unsafe.Pointer)(unsafe.Pointer(cPtr))
+ *p = goPtr
+ return &v
+}
+
+// Release removes the guarded Go pointer from the C memory by overwriting it
+// with NULL.
+func (v *PtrGuard) Release() {
+ p := (*unsafe.Pointer)(unsafe.Pointer(v.cPtr))
+ *p = nil
+ v.pinner.Unpin()
+}
diff --git a/vendor/github.com/ceph/go-ceph/rbd/resize.go b/vendor/github.com/ceph/go-ceph/rbd/resize.go
new file mode 100644
index 000000000..288639d49
--- /dev/null
+++ b/vendor/github.com/ceph/go-ceph/rbd/resize.go
@@ -0,0 +1,77 @@
+//go:build ceph_preview
+
+package rbd
+
+/*
+#cgo LDFLAGS: -lrbd
+#define _POSIX_C_SOURCE 200112L
+#undef _GNU_SOURCE
+#include
+#include
+#include
+#include
+
+extern int resize2Callback(uint64_t, uint64_t, uintptr_t);
+
+// inline wrapper to cast uintptr_t to void*
+static inline int wrap_rbd_resize2(
+ rbd_image_t image, uint64_t size, bool allow_shrink, uintptr_t arg) {
+ return rbd_resize2(
+ image, size, allow_shrink, (librbd_progress_fn_t)resize2Callback, (void*)arg);
+};
+*/
+import "C"
+
+import (
+ "github.com/ceph/go-ceph/internal/callbacks"
+)
+
+// Resize2ProgressCallback is the callback function type for Image.Resize2.
+type Resize2ProgressCallback func(progress uint64, total uint64, data interface{}) int
+
+var resizeCallbacks = callbacks.New()
+
+type resizeProgressCallbackCtx struct {
+ callback Resize2ProgressCallback
+ data interface{}
+}
+
+//export resize2Callback
+func resize2Callback(
+ offset, total C.uint64_t, index uintptr,
+) C.int {
+ v := resizeCallbacks.Lookup(index)
+ ctx := v.(resizeProgressCallbackCtx)
+ return C.int(ctx.callback(uint64(offset), uint64(total), ctx.data))
+}
+
+// Resize2 resizes an rbd image and allows configuration of allow_shrink and a callback function. The callback
+// function will be called with the first argument as the progress, the second argument as the total, and the third
+// argument as an opaque value that is passed to the Resize2 function's data argument in each callback execution.
+// The resize operation will be aborted if the progress callback returns a non-zero value.
+//
+// Implements:
+//
+// int rbd_resize(rbd_image_t image, uint64_t size, allow_shrink bool, librbd_progress_fn_t cb, void *cbdata);
+func (image *Image) Resize2(size uint64, allowShrink bool, cb Resize2ProgressCallback, data interface{}) error {
+ // the provided callback must be a real function
+ if cb == nil {
+ return rbdError(C.EINVAL)
+ }
+
+ if err := image.validate(imageIsOpen); err != nil {
+ return err
+ }
+
+ ctx := resizeProgressCallbackCtx{
+ callback: cb,
+ data: data,
+ }
+ cbIndex := resizeCallbacks.Add(ctx)
+ defer resizeCallbacks.Remove(cbIndex)
+
+ ret := C.wrap_rbd_resize2(image.image, C.uint64_t(size), C.bool(allowShrink), C.uintptr_t(cbIndex))
+
+ return getError(ret)
+
+}
diff --git a/vendor/github.com/go-jose/go-jose/v3/CHANGELOG.md b/vendor/github.com/go-jose/go-jose/v3/CHANGELOG.md
new file mode 100644
index 000000000..7820c2f4d
--- /dev/null
+++ b/vendor/github.com/go-jose/go-jose/v3/CHANGELOG.md
@@ -0,0 +1,8 @@
+# v3.0.1
+
+Fixed:
+ - Security issue: an attacker specifying a large "p2c" value can cause
+ JSONWebEncryption.Decrypt and JSONWebEncryption.DecryptMulti to consume large
+ amounts of CPU, causing a DoS. Thanks to Matt Schwager (@mschwager) for the
+ disclosure and to Tom Tervoort for originally publishing the category of attack.
+ https://i.blackhat.com/BH-US-23/Presentations/US-23-Tervoort-Three-New-Attacks-Against-JSON-Web-Tokens.pdf
diff --git a/vendor/github.com/go-jose/go-jose/v3/symmetric.go b/vendor/github.com/go-jose/go-jose/v3/symmetric.go
index fb54775ed..1ffd2708b 100644
--- a/vendor/github.com/go-jose/go-jose/v3/symmetric.go
+++ b/vendor/github.com/go-jose/go-jose/v3/symmetric.go
@@ -415,6 +415,11 @@ func (ctx *symmetricKeyCipher) decryptKey(headers rawHeader, recipient *recipien
if p2c <= 0 {
return nil, fmt.Errorf("go-jose/go-jose: invalid P2C: must be a positive integer")
}
+ if p2c > 1000000 {
+ // An unauthenticated attacker can set a high P2C value. Set an upper limit to avoid
+ // DoS attacks.
+ return nil, fmt.Errorf("go-jose/go-jose: invalid P2C: too high")
+ }
// salt is UTF8(Alg) || 0x00 || Salt Input
alg := headers.getAlgorithm()
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 781f898d8..92a16a1b2 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -138,8 +138,9 @@ github.com/ceph/ceph-csi/api/deploy/kubernetes/cephfs
github.com/ceph/ceph-csi/api/deploy/kubernetes/nfs
github.com/ceph/ceph-csi/api/deploy/kubernetes/rbd
github.com/ceph/ceph-csi/api/deploy/ocp
-# github.com/ceph/go-ceph v0.24.0
+# github.com/ceph/go-ceph v0.24.1-0.20231116190858-df112a417d31
## explicit; go 1.19
+github.com/ceph/go-ceph/cephfs
github.com/ceph/go-ceph/cephfs/admin
github.com/ceph/go-ceph/common/admin/manager
github.com/ceph/go-ceph/common/admin/nfs
@@ -208,7 +209,7 @@ github.com/gemalto/kmip-go/ttlv
# github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32
## explicit
github.com/ghodss/yaml
-# github.com/go-jose/go-jose/v3 v3.0.0
+# github.com/go-jose/go-jose/v3 v3.0.1
## explicit; go 1.12
github.com/go-jose/go-jose/v3
github.com/go-jose/go-jose/v3/cipher