vendor updates

This commit is contained in:
Serguei Bezverkhi
2018-03-06 17:33:18 -05:00
parent 4b3ebc171b
commit e9033989a0
5854 changed files with 248382 additions and 119809 deletions

View File

@ -8,4 +8,71 @@ they can be searched, viewed, and analyzed.
Learn more at: https://kubernetes.io/docs/tasks/debug-application-cluster/logging-stackdriver
## Troubleshooting
In Kubernetes clusters in version 1.10.0 or later, fluentd-gcp DaemonSet can be
manually scaled. This is useful e.g. when applications running in the cluster
are sending a large volume of logs (i.e. over 100kB/s), causing fluentd-gcp to
fail with OutOfMemory errors. Conversely, if the applications aren't generating
a lot of logs, it may be useful to reduce the amount of resources consumed by
fluentd-gcp, making these resources available to other applications. To learn
more about Kubernetes resource requests and limits, see the official
documentation ([CPU][cpu], [memory][memory]). The amount of resources requested
by fluentd-gcp on every node in the cluster can be fetched by running following
command:
```
$ kubectl get ds -n kube-system -l k8s-app=fluentd-gcp \
-o custom-columns=NAME:.metadata.name,\
CPU_REQUEST:.spec.template.spec.containers[].resources.requests.cpu,\
MEMORY_REQUEST:.spec.template.spec.containers[].resources.requests.memory,\
MEMORY_LIMIT:.spec.template.spec.containers[].resources.limits.memory
```
This will display an output similar to the following:
```
NAME CPU_REQUEST MEMORY_REQUEST MEMORY_LIMIT
fluentd-gcp-v2.0.15 100m 200Mi 300Mi
```
In order to change those values, a [ScalingPolicy][scalingPolicy] needs to be
defined. Currently, only base values are supported (no automatic scaling). The
ScalingPolicy can be created using kubectl. E.g. to set cpu request to 101m,
memory request to 150Mi and memory limit to 400Mi:
```
$ cat <<EOF | kubectl apply -f -
apiVersion: scalingpolicy.kope.io/v1alpha1
kind: ScalingPolicy
metadata:
name: fluentd-gcp-scaling-policy
namespace: kube-system
spec:
containers:
- name: fluentd-gcp
resources:
requests:
- resource: cpu
base: 101m
- resource: memory
base: 150Mi
limits:
- resource: memory
base: 400Mi
EOF
```
To remove the override and go back to GKE-provided defaults, it is enough to
just remove the ScalingPolicy:
```
$ kubectl delete -n kube-system scalingpolicies.scalingpolicy.kope.io/fluentd-gcp-scaling-policy
```
[cpu]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/
[memory]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/
[scalingPolicy]: https://github.com/justinsb/scaler
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/fluentd-gcp/README.md?pixel)]()

View File

@ -29,11 +29,11 @@ subjects:
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: event-exporter-v0.1.7
name: event-exporter-v0.1.8
namespace: kube-system
labels:
k8s-app: event-exporter
version: v0.1.7
version: v0.1.8
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
spec:
@ -42,17 +42,18 @@ spec:
metadata:
labels:
k8s-app: event-exporter
version: v0.1.7
version: v0.1.8
spec:
serviceAccountName: event-exporter-sa
containers:
- name: event-exporter
image: gcr.io/google-containers/event-exporter:v0.1.7
image: k8s.gcr.io/event-exporter:v0.1.8
command:
- '/event-exporter'
- /event-exporter
- -sink-opts="-location={{ event_exporter_zone }}"
# BEGIN_PROMETHEUS_TO_SD
- name: prometheus-to-sd-exporter
image: gcr.io/google-containers/prometheus-to-sd:v0.2.2
image: k8s.gcr.io/prometheus-to-sd:v0.2.4
command:
- /monitor
- --stackdriver-prefix={{ prometheus_to_sd_prefix }}/addons

View File

@ -46,33 +46,42 @@ data:
# CRI Log Example:
# 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
<source>
type tail
@type tail
path /var/log/containers/*.log
pos_file /var/log/gcp-containers.log.pos
tag reform.*
read_from_head true
format multi_format
<pattern>
format json
time_key time
time_format %Y-%m-%dT%H:%M:%S.%NZ
</pattern>
<pattern>
format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
time_format %Y-%m-%dT%H:%M:%S.%N%:z
</pattern>
format none
</source>
<filter reform.**>
type parser
@type parser
key_name message
<parse>
@type multi_format
<pattern>
format json
time_key time
time_format %Y-%m-%dT%H:%M:%S.%NZ
</pattern>
<pattern>
format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
time_format %Y-%m-%dT%H:%M:%S.%N%:z
</pattern>
</parse>
</filter>
<filter reform.**>
@type parser
format /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)/
reserve_data true
suppress_parse_error_log true
emit_invalid_record_to_error false
key_name log
</filter>
<match reform.**>
type record_reformer
@type record_reformer
enable_ruby true
tag raw.kubernetes.${tag_suffix[4].split('-')[0..-2].join('-')}
</match>
@ -89,21 +98,10 @@ data:
max_lines 1000
</match>
system.input.conf: |-
# Example:
# 2015-12-21 23:17:22,066 [salt.state ][INFO ] Completed state [net.ipv4.ip_forward] at time 23:17:22.066081
<source>
type tail
format /^(?<time>[^ ]* [^ ,]*)[^\[]*\[[^\]]*\]\[(?<severity>[^ \]]*) *\] (?<message>.*)$/
time_format %Y-%m-%d %H:%M:%S
path /var/log/salt/minion
pos_file /var/log/gcp-salt.pos
tag salt
</source>
# Example:
# Dec 21 23:17:22 gke-foo-1-1-4b5cbd14-node-4eoj startupscript: Finished running startup script /var/run/google.startup.script
<source>
type tail
@type tail
format syslog
path /var/log/startupscript.log
pos_file /var/log/gcp-startupscript.log.pos
@ -113,8 +111,9 @@ data:
# Examples:
# time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json"
# time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404
# TODO(random-liu): Remove this after cri container runtime rolls out.
<source>
type tail
@type tail
format /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
path /var/log/docker.log
pos_file /var/log/gcp-docker.log.pos
@ -124,7 +123,7 @@ data:
# Example:
# 2016/02/04 06:52:38 filePurge: successfully removed file /var/etcd/data/member/wal/00000000000006d0-00000000010a23d1.wal
<source>
type tail
@type tail
# Not parsing this, because it doesn't have anything particularly useful to
# parse out of it (like severities).
format none
@ -140,7 +139,7 @@ data:
# Example:
# I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -154,7 +153,7 @@ data:
# Example:
# I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -168,7 +167,7 @@ data:
# Example:
# I0204 07:00:19.604280 5 handlers.go:131] GET /api/v1/nodes: (1.624207ms) 200 [[kube-controller-manager/v1.1.3 (linux/amd64) kubernetes/6a81b50] 127.0.0.1:38266]
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -182,7 +181,7 @@ data:
# Example:
# I0204 06:55:31.872680 5 servicecontroller.go:277] LB already exists and doesn't need update for service kube-system/kube-ui
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -196,7 +195,7 @@ data:
# Example:
# W0204 06:49:18.239674 7 reflector.go:245] pkg/scheduler/factory/factory.go:193: watch of *api.Service ended with: 401: The event in requested index is outdated and cleared (the requested history has been cleared [2578313/2577886]) [2579312]
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -210,7 +209,7 @@ data:
# Example:
# I1104 10:36:20.242766 5 rescheduler.go:73] Running Rescheduler
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -224,7 +223,7 @@ data:
# Example:
# I0603 15:31:05.793605 6 cluster_manager.go:230] Reading config from path /etc/gce.conf
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -238,7 +237,7 @@ data:
# Example:
# I0603 15:31:05.793605 6 cluster_manager.go:230] Reading config from path /etc/gce.conf
<source>
type tail
@type tail
format multiline
multiline_flush_interval 5s
format_firstline /^\w\d{4}/
@ -250,8 +249,10 @@ data:
</source>
# Logs from systemd-journal for interesting services.
# TODO(random-liu): Keep this for compatibility, remove this after
# cri container runtime rolls out.
<source>
type systemd
@type systemd
filters [{ "_SYSTEMD_UNIT": "docker.service" }]
pos_file /var/log/gcp-journald-docker.pos
read_from_head true
@ -259,7 +260,15 @@ data:
</source>
<source>
type systemd
@type systemd
filters [{ "_SYSTEMD_UNIT": "{{ container_runtime }}.service" }]
pos_file /var/log/gcp-journald-container-runtime.pos
read_from_head true
tag container-runtime
</source>
<source>
@type systemd
filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
pos_file /var/log/gcp-journald-kubelet.pos
read_from_head true
@ -267,23 +276,13 @@ data:
</source>
<source>
type systemd
@type systemd
filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }]
pos_file /var/log/gcp-journald-node-problem-detector.pos
read_from_head true
tag node-problem-detector
</source>
monitoring.conf: |-
# Prometheus monitoring
<source>
@type prometheus
port 31337
</source>
<source>
@type prometheus_monitor
</source>
# This source is used to acquire approximate process start timestamp,
# which purpose is explained before the corresponding output plugin.
<source>
@ -356,6 +355,8 @@ data:
# Collect metrics in Prometheus registry about plugin activity.
enable_monitoring true
monitoring_type prometheus
# Allow log entries from multiple containers to be sent in the same request.
split_logs_by_tag false
# Set the buffer type to file to improve the reliability and reduce the memory consumption
buffer_type file
buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
@ -376,6 +377,11 @@ data:
disable_retry_limit
# Use multiple threads for processing.
num_threads 2
labels {
# The logging backend will take responsibility for double writing to
# the necessary resource types when this label is set.
"logging.googleapis.com/k8s_compatibility": "true"
}
</match>
# Keep a smaller buffer here since these logs are less important than the user's
@ -386,6 +392,8 @@ data:
detect_json true
enable_monitoring true
monitoring_type prometheus
# Allow entries from multiple system logs to be sent in the same request.
split_logs_by_tag false
detect_subservice false
buffer_type file
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
@ -396,9 +404,14 @@ data:
max_retry_wait 30
disable_retry_limit
num_threads 2
labels {
# The logging backend will take responsibility for double writing to
# the necessary resource types when this label is set.
"logging.googleapis.com/k8s_compatibility": "true"
}
</match>
metadata:
name: fluentd-gcp-config-v1.2.3
name: fluentd-gcp-config-v1.2.4
namespace: kube-system
labels:
addonmanager.kubernetes.io/mode: Reconcile

View File

@ -1,13 +1,13 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: fluentd-gcp-v2.0.10
name: fluentd-gcp-v3.0.0
namespace: kube-system
labels:
k8s-app: fluentd-gcp
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v2.0.10
version: v3.0.0
spec:
updateStrategy:
type: RollingUpdate
@ -16,27 +16,19 @@ spec:
labels:
k8s-app: fluentd-gcp
kubernetes.io/cluster-service: "true"
version: v2.0.10
version: v3.0.0
# This annotation ensures that fluentd does not get evicted if the node
# supports critical pod annotation based priority scheme.
# Note that this does not guarantee admission on the nodes (#40573).
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-node-critical
serviceAccountName: fluentd-gcp
dnsPolicy: Default
containers:
- name: fluentd-gcp
image: gcr.io/google-containers/fluentd-gcp:2.0.10
env:
- name: FLUENTD_ARGS
value: --no-supervisor -q
resources:
limits:
memory: 300Mi
requests:
cpu: 100m
memory: 200Mi
image: gcr.io/stackdriver-agents/stackdriver-logging-agent:{{ fluentd_gcp_version }}
volumeMounts:
- name: varlog
mountPath: /var/log
@ -47,7 +39,7 @@ spec:
mountPath: /host/lib
readOnly: true
- name: config-volume
mountPath: /etc/fluent/config.d
mountPath: /etc/google-fluentd/config.d
# Liveness probe is aimed to help in situarions where fluentd
# silently hangs for no apparent reasons until manual restart.
# The idea of this probe is that if fluentd is not queueing or
@ -82,12 +74,12 @@ spec:
fi;
# BEGIN_PROMETHEUS_TO_SD
- name: prometheus-to-sd-exporter
image: gcr.io/google-containers/prometheus-to-sd:v0.2.2
image: k8s.gcr.io/prometheus-to-sd:v0.2.4
command:
- /monitor
- --stackdriver-prefix={{ prometheus_to_sd_prefix }}/addons
- --api-override={{ prometheus_to_sd_endpoint }}
- --source=fluentd:http://localhost:31337?whitelisted=stackdriver_successful_requests_count,stackdriver_failed_requests_count,stackdriver_ingested_entries_count,stackdriver_dropped_entries_count
- --source=fluentd:http://localhost:24231?whitelisted=stackdriver_successful_requests_count,stackdriver_failed_requests_count,stackdriver_ingested_entries_count,stackdriver_dropped_entries_count
- --pod-id=$(POD_NAME)
- --namespace-id=$(POD_NAMESPACE)
env:
@ -122,4 +114,4 @@ spec:
path: /usr/lib64
- name: config-volume
configMap:
name: fluentd-gcp-config-v1.2.3
name: fluentd-gcp-config-v1.2.4

View File

@ -0,0 +1,34 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: fluentd-gcp-scaler
namespace: kube-system
labels:
k8s-app: fluentd-gcp-scaler
version: v0.1.0
addonmanager.kubernetes.io/mode: Reconcile
spec:
selector:
matchLabels:
k8s-app: fluentd-gcp-scaler
template:
metadata:
labels:
k8s-app: fluentd-gcp-scaler
spec:
serviceAccountName: fluentd-gcp-scaler
containers:
- name: fluentd-gcp-scaler
image: gcr.io/google-containers/fluentd-gcp-scaler:0.1
command:
- /scaler.sh
- --ds-name=fluentd-gcp-v3.0.0
- --scaling-policy=fluentd-gcp-scaling-policy
env:
# Defaults, used if no overrides are found in fluentd-gcp-scaling-policy
- name: CPU_REQUEST
value: 100m
- name: MEMORY_REQUEST
value: 200Mi
- name: MEMORY_LIMIT
value: 300Mi

View File

@ -0,0 +1,13 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: scalingpolicies.scalingpolicy.kope.io
labels:
addonmanager.kubernetes.io/mode: Reconcile
spec:
group: scalingpolicy.kope.io
version: v1alpha1
names:
kind: ScalingPolicy
plural: scalingpolicies
scope: Namespaced

View File

@ -0,0 +1,48 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: fluentd-gcp-scaler
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: system:fluentd-gcp-scaler
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups:
- "extensions"
resources:
- daemonsets
verbs:
- get
- patch
- apiGroups:
- "scalingpolicy.kope.io"
resources:
- scalingpolicies
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: fluentd-gcp-scaler-binding
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: system:fluentd-gcp-scaler
subjects:
- kind: ServiceAccount
name: fluentd-gcp-scaler
namespace: kube-system