diff --git a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml index eeadbd533..a3b435a00 100644 --- a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml +++ b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml @@ -75,6 +75,9 @@ spec: - "--drivername=$(DRIVER_NAME)" {{- if .Values.topology.enabled }} - "--domainlabels={{ .Values.topology.domainLabels | join "," }}" +{{- end }} +{{- if .Values.nodeplugin.profiling.enabled }} + - "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}" {{- end }} env: - name: POD_IP diff --git a/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml b/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml index 8d6168a29..b747e53ef 100644 --- a/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml +++ b/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml @@ -136,6 +136,9 @@ spec: - "--endpoint=$(CSI_ENDPOINT)" - "--v={{ .Values.logLevel }}" - "--drivername=$(DRIVER_NAME)" +{{- if .Values.provisioner.profiling.enabled }} + - "--enableprofiling={{ .Values.provisioner.profiling.enabled }}" +{{- end }} env: - name: POD_IP valueFrom: diff --git a/charts/ceph-csi-cephfs/values.yaml b/charts/ceph-csi-cephfs/values.yaml index 19049139a..59cd6a470 100644 --- a/charts/ceph-csi-cephfs/values.yaml +++ b/charts/ceph-csi-cephfs/values.yaml @@ -74,6 +74,9 @@ nodeplugin: loadBalancerIP: "" loadBalancerSourceRanges: [] + profiling: + enabled: false + registrar: image: repository: k8s.gcr.io/sig-storage/csi-node-driver-registrar @@ -144,6 +147,9 @@ provisioner: loadBalancerIP: "" loadBalancerSourceRanges: [] + profiling: + enabled: false + provisioner: image: repository: k8s.gcr.io/sig-storage/csi-provisioner diff --git a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml index 049b11806..445ec0dfe 100644 --- a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml +++ b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml @@ -73,6 +73,9 @@ spec: - "--drivername=$(DRIVER_NAME)" {{- if .Values.topology.enabled }} - "--domainlabels={{ .Values.topology.domainLabels | join "," }}" +{{- end }} +{{- if .Values.nodeplugin.profiling.enabled }} + - "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}" {{- end }} env: - name: POD_IP diff --git a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml index 33527fb9c..cf72883f4 100644 --- a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml +++ b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml @@ -142,6 +142,9 @@ spec: {{- if .Values.provisioner.skipForceFlatten }} - "--skipforceflatten={{ .Values.provisioner.skipForceFlatten }}" {{- end }} + {{- if .Values.provisioner.profiling.enabled }} + - "--enableprofiling={{ .Values.provisioner.profiling.enabled }}" + {{- end }} env: - name: POD_IP valueFrom: diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index d00b407c0..a5862e78b 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -85,6 +85,10 @@ nodeplugin: loadBalancerIP: "" loadBalancerSourceRanges: [] + profiling: + # enable profiling to check for memory leaks + enabled: false + registrar: image: repository: k8s.gcr.io/sig-storage/csi-node-driver-registrar @@ -169,6 +173,10 @@ provisioner: loadBalancerIP: "" loadBalancerSourceRanges: [] + profiling: + # enable profiling to check for memory leaks + enabled: false + provisioner: image: repository: k8s.gcr.io/sig-storage/csi-provisioner diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index 78342e714..803b28461 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -90,6 +90,7 @@ func init() { "skip image flattening if kernel support mapping of rbd images which has the deep-flatten feature") flag.BoolVar(&conf.Version, "version", false, "Print cephcsi version information") + flag.BoolVar(&conf.EnableProfiling, "enableprofiling", false, "enable go profiling") klog.InitFlags(nil) if err := flag.Set("logtostderr", "true"); err != nil { diff --git a/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml b/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml index 910c7b6f1..423fc6008 100644 --- a/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml +++ b/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml @@ -120,6 +120,7 @@ spec: - "--v=5" - "--drivername=cephfs.csi.ceph.com" - "--pidlimit=-1" + - "--enableprofiling=false" env: - name: POD_IP valueFrom: diff --git a/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml b/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml index da49b30d4..f061af8a4 100644 --- a/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml +++ b/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml @@ -55,6 +55,7 @@ spec: - "--endpoint=$(CSI_ENDPOINT)" - "--v=5" - "--drivername=cephfs.csi.ceph.com" + - "--enableprofiling=false" # If topology based provisioning is desired, configure required # node labels representing the nodes topology domain # and pass the label names below, for CSI to consume and advertise diff --git a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml index 0d20cc367..dfe019aab 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml @@ -125,6 +125,7 @@ spec: - "--pidlimit=-1" - "--rbdhardmaxclonedepth=8" - "--rbdsoftmaxclonedepth=4" + - "--enableprofiling=false" env: - name: POD_IP valueFrom: diff --git a/deploy/rbd/kubernetes/csi-rbdplugin.yaml b/deploy/rbd/kubernetes/csi-rbdplugin.yaml index d15894aee..0d524b8e3 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin.yaml @@ -56,6 +56,7 @@ spec: - "--endpoint=$(CSI_ENDPOINT)" - "--v=5" - "--drivername=rbd.csi.ceph.com" + - "--enableprofiling=false" # If topology based provisioning is desired, configure required # node labels representing the nodes topology domain # and pass the label names below, for CSI to consume and advertise diff --git a/internal/cephfs/driver.go b/internal/cephfs/driver.go index 66921b656..174f68cda 100644 --- a/internal/cephfs/driver.go +++ b/internal/cephfs/driver.go @@ -164,5 +164,12 @@ func (fs *Driver) Run(conf *util.Config) { util.WarningLogMsg("EnableGRPCMetrics is deprecated") go util.StartMetricsServer(conf) } + if conf.EnableProfiling { + if !conf.EnableGRPCMetrics { + go util.StartMetricsServer(conf) + } + util.DebugLogMsg("Registering profiling handler") + go util.EnableProfiling() + } server.Wait() } diff --git a/internal/rbd/driver.go b/internal/rbd/driver.go index 4f43ce99d..cc8c369d0 100644 --- a/internal/rbd/driver.go +++ b/internal/rbd/driver.go @@ -187,5 +187,12 @@ func (r *Driver) Run(conf *util.Config) { util.WarningLogMsg("EnableGRPCMetrics is deprecated") go util.StartMetricsServer(conf) } + if conf.EnableProfiling { + if !conf.EnableGRPCMetrics { + go util.StartMetricsServer(conf) + } + util.DebugLogMsg("Registering profiling handler") + go util.EnableProfiling() + } s.Wait() } diff --git a/internal/util/httpserver.go b/internal/util/httpserver.go index cd2619635..d06630a7c 100644 --- a/internal/util/httpserver.go +++ b/internal/util/httpserver.go @@ -3,7 +3,9 @@ package util import ( "net" "net/http" + "net/http/pprof" "net/url" + runtime_pprof "runtime/pprof" "strconv" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -24,3 +26,22 @@ func StartMetricsServer(c *Config) { FatalLogMsg("failed to listen on address %v: %s", addr, err) } } +func addPath(name string, handler http.Handler) { + http.Handle(name, handler) + DebugLogMsg("DEBUG: registered profiling handler on /debug/pprof/%s\n", name) +} + +// EnableProfiling enables golang profiling. +func EnableProfiling() { + for _, profile := range runtime_pprof.Profiles() { + name := profile.Name() + handler := pprof.Handler(name) + addPath(name, handler) + } + + // static profiles as listed in net/http/pprof/pprof.go:init() + addPath("cmdline", http.HandlerFunc(pprof.Cmdline)) + addPath("profile", http.HandlerFunc(pprof.Profile)) + addPath("symbol", http.HandlerFunc(pprof.Symbol)) + addPath("trace", http.HandlerFunc(pprof.Trace)) +} diff --git a/internal/util/util.go b/internal/util/util.go index df2c8eaba..955923db6 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -88,6 +88,7 @@ type Config struct { PoolTimeout time.Duration // probe timeout in seconds EnableGRPCMetrics bool // option to enable grpc metrics + EnableProfiling bool // flag to enable profiling IsControllerServer bool // if set to true start provisoner server IsNodeServer bool // if set to true start node server Version bool // cephcsi version