mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 02:43:36 +00:00
Fresh dep ensure
This commit is contained in:
136
vendor/k8s.io/kubernetes/cluster/log-dump/log-dump.sh
generated
vendored
136
vendor/k8s.io/kubernetes/cluster/log-dump/log-dump.sh
generated
vendored
@ -41,20 +41,20 @@ readonly master_ssh_supported_providers="gce aws kubernetes-anywhere"
|
||||
readonly node_ssh_supported_providers="gce gke aws kubernetes-anywhere"
|
||||
readonly gcloud_supported_providers="gce gke kubernetes-anywhere"
|
||||
|
||||
readonly master_logfiles="kube-apiserver kube-apiserver-audit kube-scheduler rescheduler kube-controller-manager etcd etcd-events glbc cluster-autoscaler kube-addon-manager fluentd"
|
||||
readonly node_logfiles="kube-proxy fluentd node-problem-detector"
|
||||
readonly master_logfiles="kube-apiserver.log kube-apiserver-audit.log kube-scheduler.log kube-controller-manager.log etcd.log etcd-events.log glbc.log cluster-autoscaler.log kube-addon-manager.log fluentd.log kubelet.cov"
|
||||
readonly node_logfiles="kube-proxy.log fluentd.log node-problem-detector.log kubelet.cov"
|
||||
readonly node_systemd_services="node-problem-detector"
|
||||
readonly hollow_node_logfiles="kubelet-hollow-node-* kubeproxy-hollow-node-* npd-hollow-node-*"
|
||||
readonly aws_logfiles="cloud-init-output"
|
||||
readonly gce_logfiles="startupscript"
|
||||
readonly kern_logfile="kern"
|
||||
readonly initd_logfiles="docker"
|
||||
readonly supervisord_logfiles="kubelet supervisor/supervisord supervisor/kubelet-stdout supervisor/kubelet-stderr supervisor/docker-stdout supervisor/docker-stderr"
|
||||
readonly hollow_node_logfiles="kubelet-hollow-node-*.log kubeproxy-hollow-node-*.log npd-hollow-node-*.log"
|
||||
readonly aws_logfiles="cloud-init-output.log"
|
||||
readonly gce_logfiles="startupscript.log"
|
||||
readonly kern_logfile="kern.log"
|
||||
readonly initd_logfiles="docker/log"
|
||||
readonly supervisord_logfiles="kubelet.log supervisor/supervisord.log supervisor/kubelet-stdout.log supervisor/kubelet-stderr.log supervisor/docker-stdout.log supervisor/docker-stderr.log"
|
||||
readonly systemd_services="kubelet kubelet-monitor kube-container-runtime-monitor ${LOG_DUMP_SYSTEMD_SERVICES:-docker}"
|
||||
|
||||
# Limit the number of concurrent node connections so that we don't run out of
|
||||
# file descriptors for large clusters.
|
||||
readonly max_scp_processes=25
|
||||
readonly max_dump_processes=25
|
||||
|
||||
# TODO: Get rid of all the sourcing of bash dependencies eventually.
|
||||
function setup() {
|
||||
@ -100,10 +100,10 @@ function copy-logs-from-node() {
|
||||
local -r node="${1}"
|
||||
local -r dir="${2}"
|
||||
local files=( ${3} )
|
||||
# Append ".log*"
|
||||
# Append "*"
|
||||
# The * at the end is needed to also copy rotated logs (which happens
|
||||
# in large clusters and long runs).
|
||||
files=( "${files[@]/%/.log*}" )
|
||||
files=( "${files[@]/%/*}" )
|
||||
# Prepend "/var/log/"
|
||||
files=( "${files[@]/#/\/var\/log\/}" )
|
||||
# Comma delimit (even the singleton, or scp does the wrong thing), surround by braces.
|
||||
@ -168,6 +168,21 @@ function save-logs() {
|
||||
files="${kern_logfile} ${files} ${initd_logfiles} ${supervisord_logfiles}"
|
||||
fi
|
||||
|
||||
# Try dumping coverage profiles, if it looks like coverage is enabled in the first place.
|
||||
if log-dump-ssh "${node_name}" "stat /var/log/kubelet.cov" &> /dev/null; then
|
||||
if log-dump-ssh "${node_name}" "command -v docker" &> /dev/null; then
|
||||
if [[ "${on_master}" == "true" ]]; then
|
||||
run-in-docker-container "${node_name}" "kube-apiserver" "cat /tmp/k8s-kube-apiserver.cov" > "${dir}/kube-apiserver.cov" || true
|
||||
run-in-docker-container "${node_name}" "kube-scheduler" "cat /tmp/k8s-kube-scheduler.cov" > "${dir}/kube-scheduler.cov" || true
|
||||
run-in-docker-container "${node_name}" "kube-controller-manager" "cat /tmp/k8s-kube-controller-manager.cov" > "${dir}/kube-controller-manager.cov" || true
|
||||
else
|
||||
run-in-docker-container "${node_name}" "kube-proxy" "cat /tmp/k8s-kube-proxy.cov" > "${dir}/kube-proxy.cov" || true
|
||||
fi
|
||||
else
|
||||
echo "Coverage profiles seem to exist, but cannot be retrieved from inside containers."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Changing logfiles to be world-readable for download"
|
||||
log-dump-ssh "${node_name}" "sudo chmod -R a+r /var/log" || true
|
||||
|
||||
@ -175,6 +190,15 @@ function save-logs() {
|
||||
copy-logs-from-node "${node_name}" "${dir}" "${files}"
|
||||
}
|
||||
|
||||
# Execute a command in container $2 on node $1.
|
||||
# Uses docker because the container may not ordinarily permit direct execution.
|
||||
function run-in-docker-container() {
|
||||
local node_name="$1"
|
||||
local container="$2"
|
||||
shift 2
|
||||
log-dump-ssh "${node_name}" "docker exec \"\$(docker ps -f label=io.kubernetes.container.name=${container} --format \"{{.ID}}\")\" $@"
|
||||
}
|
||||
|
||||
function dump_masters() {
|
||||
local master_names
|
||||
if [[ -n "${use_custom_instance_list}" ]]; then
|
||||
@ -197,23 +221,23 @@ function dump_masters() {
|
||||
return
|
||||
fi
|
||||
|
||||
proc=${max_scp_processes}
|
||||
proc=${max_dump_processes}
|
||||
for master_name in "${master_names[@]}"; do
|
||||
master_dir="${report_dir}/${master_name}"
|
||||
mkdir -p "${master_dir}"
|
||||
save-logs "${master_name}" "${master_dir}" "${master_logfiles}" "" "true" &
|
||||
|
||||
# We don't want to run more than ${max_scp_processes} at a time, so
|
||||
# We don't want to run more than ${max_dump_processes} at a time, so
|
||||
# wait once we hit that many nodes. This isn't ideal, since one might
|
||||
# take much longer than the others, but it should help.
|
||||
proc=$((proc - 1))
|
||||
if [[ proc -eq 0 ]]; then
|
||||
proc=${max_scp_processes}
|
||||
proc=${max_dump_processes}
|
||||
wait
|
||||
fi
|
||||
done
|
||||
# Wait for any remaining processes.
|
||||
if [[ proc -gt 0 && proc -lt ${max_scp_processes} ]]; then
|
||||
if [[ proc -gt 0 && proc -lt ${max_dump_processes} ]]; then
|
||||
wait
|
||||
fi
|
||||
}
|
||||
@ -258,7 +282,7 @@ function dump_nodes() {
|
||||
nodes_selected_for_logs=( "${node_names[@]}" )
|
||||
fi
|
||||
|
||||
proc=${max_scp_processes}
|
||||
proc=${max_dump_processes}
|
||||
for node_name in "${nodes_selected_for_logs[@]}"; do
|
||||
node_dir="${report_dir}/${node_name}"
|
||||
mkdir -p "${node_dir}"
|
||||
@ -266,21 +290,39 @@ function dump_nodes() {
|
||||
# many nodes.
|
||||
save-logs "${node_name}" "${node_dir}" "${node_logfiles_all}" "${node_systemd_services}" &
|
||||
|
||||
# We don't want to run more than ${max_scp_processes} at a time, so
|
||||
# We don't want to run more than ${max_dump_processes} at a time, so
|
||||
# wait once we hit that many nodes. This isn't ideal, since one might
|
||||
# take much longer than the others, but it should help.
|
||||
proc=$((proc - 1))
|
||||
if [[ proc -eq 0 ]]; then
|
||||
proc=${max_scp_processes}
|
||||
proc=${max_dump_processes}
|
||||
wait
|
||||
fi
|
||||
done
|
||||
# Wait for any remaining processes.
|
||||
if [[ proc -gt 0 && proc -lt ${max_scp_processes} ]]; then
|
||||
if [[ proc -gt 0 && proc -lt ${max_dump_processes} ]]; then
|
||||
wait
|
||||
fi
|
||||
}
|
||||
|
||||
# Collect names of nodes which didn't run logexporter successfully.
|
||||
# Note: This step is O(#nodes^2) as we check if each node is present in the list of succeeded nodes.
|
||||
# Making it linear would add code complexity without much benefit (as it just takes ~1s for 5k nodes).
|
||||
# Assumes:
|
||||
# NODE_NAMES
|
||||
# Sets:
|
||||
# NON_LOGEXPORTED_NODES
|
||||
function find_non_logexported_nodes() {
|
||||
succeeded_nodes=$(gsutil ls ${gcs_artifacts_dir}/logexported-nodes-registry) || return 1
|
||||
echo "Successfully listed marker files for successful nodes"
|
||||
NON_LOGEXPORTED_NODES=()
|
||||
for node in "${NODE_NAMES[@]}"; do
|
||||
if [[ ! "${succeeded_nodes}" =~ "${node}" ]]; then
|
||||
NON_LOGEXPORTED_NODES+=("${node}")
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function dump_nodes_with_logexporter() {
|
||||
echo "Detecting nodes in the cluster"
|
||||
detect-node-names &> /dev/null
|
||||
@ -312,14 +354,49 @@ function dump_nodes_with_logexporter() {
|
||||
return
|
||||
fi
|
||||
|
||||
# Give some time for the pods to finish uploading logs.
|
||||
sleep "${logexport_sleep_seconds}"
|
||||
# Periodically fetch list of already logexported nodes to verify
|
||||
# if we aren't already done.
|
||||
start="$(date +%s)"
|
||||
while true; do
|
||||
now="$(date +%s)"
|
||||
if [[ $((now - start)) -gt ${logexport_sleep_seconds} ]]; then
|
||||
echo "Waiting for all nodes to be logexported timed out."
|
||||
break
|
||||
fi
|
||||
if find_non_logexported_nodes; then
|
||||
if [[ -z "${NON_LOGEXPORTED_NODES:-}" ]]; then
|
||||
break
|
||||
fi
|
||||
fi
|
||||
sleep 15
|
||||
done
|
||||
|
||||
# Store logs from logexporter pods to allow debugging log exporting process
|
||||
# itself.
|
||||
proc=${max_dump_processes}
|
||||
"${KUBECTL}" get pods -n "${logexporter_namespace}" -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end}' | while read pod node; do
|
||||
echo "Fetching logs from ${pod} running on ${node}"
|
||||
mkdir -p ${report_dir}/${node}
|
||||
"${KUBECTL}" logs -n "${logexporter_namespace}" ${pod} > ${report_dir}/${node}/${pod}.log &
|
||||
|
||||
# We don't want to run more than ${max_dump_processes} at a time, so
|
||||
# wait once we hit that many nodes. This isn't ideal, since one might
|
||||
# take much longer than the others, but it should help.
|
||||
proc=$((proc - 1))
|
||||
if [[ proc -eq 0 ]]; then
|
||||
proc=${max_dump_processes}
|
||||
wait
|
||||
fi
|
||||
done
|
||||
# Wait for any remaining processes.
|
||||
if [[ proc -gt 0 && proc -lt ${max_dump_processes} ]]; then
|
||||
wait
|
||||
fi
|
||||
|
||||
# List registry of marker files (of nodes whose logexporter succeeded) from GCS.
|
||||
local nodes_succeeded
|
||||
for retry in {1..10}; do
|
||||
if nodes_succeeded=$(gsutil ls ${gcs_artifacts_dir}/logexported-nodes-registry); then
|
||||
echo "Successfully listed marker files for successful nodes"
|
||||
if find_non_logexported_nodes; then
|
||||
break
|
||||
else
|
||||
echo "Attempt ${retry} failed to list marker files for succeessful nodes"
|
||||
@ -333,16 +410,15 @@ function dump_nodes_with_logexporter() {
|
||||
fi
|
||||
done
|
||||
|
||||
# Collect names of nodes which didn't run logexporter successfully.
|
||||
# Note: This step is O(#nodes^2) as we check if each node is present in the list of succeeded nodes.
|
||||
# Making it linear would add code complexity without much benefit (as it just takes ~1s for 5k nodes).
|
||||
failed_nodes=()
|
||||
for node in "${NODE_NAMES[@]}"; do
|
||||
if [[ ! "${nodes_succeeded}" =~ "${node}" ]]; then
|
||||
# The following if is needed, because defaulting for empty arrays
|
||||
# seems to treat them as non-empty with single empty string.
|
||||
if [[ -n "${NON_LOGEXPORTED_NODES:-}" ]]; then
|
||||
for node in "${NON_LOGEXPORTED_NODES[@]:-}"; do
|
||||
echo "Logexporter didn't succeed on node ${node}. Queuing it for logdump through SSH."
|
||||
failed_nodes+=("${node}")
|
||||
fi
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
# Delete the logexporter resources and dump logs for the failed nodes (if any) through SSH.
|
||||
"${KUBECTL}" get pods --namespace "${logexporter_namespace}" || true
|
||||
|
2
vendor/k8s.io/kubernetes/cluster/log-dump/logexporter-daemonset.yaml
generated
vendored
2
vendor/k8s.io/kubernetes/cluster/log-dump/logexporter-daemonset.yaml
generated
vendored
@ -36,7 +36,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: logexporter-test
|
||||
image: k8s.gcr.io/logexporter:v0.1.1
|
||||
image: gcr.io/k8s-testimages/logexporter:v0.1.3
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
|
Reference in New Issue
Block a user