#!/bin/bash # Copyright 2016 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This script is for master and node instance health monitoring, which is # packed in kube-manifest tarball. It is executed through a systemd service # in cluster/gce/gci/.yaml. The env variables come from an env # file provided by the systemd service. set -o nounset set -o pipefail # We simply kill the process when there is a failure. Another systemd service will # automatically restart the process. proc docker_monitoring { while test 1 { if ! timeout 60 docker ps > /dev/null { echo "Docker daemon failed!" pkill docker # Wait for a while, as we don't want to kill it again before it is really up. sleep 30 } else { sleep ${SLEEP_SECONDS} } } } proc kubelet_monitoring { echo "Wait for 2 minutes for kubelet to be fuctional" # TODO(andyzheng0831): replace it with a more reliable method if possible. sleep 120 local -r max_seconds=10 local output="" while test 1 { if ! setvar output = $(curl --insecure -m "${max_seconds}" -f -s -S https://127.0.0.1:${KUBELET_PORT:-10250}/healthz 2>&1) { # Print the response and/or errors. echo $output echo "Kubelet is unhealthy!" pkill kubelet # Wait for a while, as we don't want to kill it again before it is really up. sleep 60 } else { sleep ${SLEEP_SECONDS} } } } ############## Main Function ################ if [[ "$#" -ne 1 ]] { echo "Usage: health-monitor.sh " exit 1 } setvar KUBE_ENV = ""/home/kubernetes/kube-env"" if [[ ! -e "${KUBE_ENV}" ]] { echo "The ${KUBE_ENV} file does not exist!! Terminate health monitoring" exit 1 } setvar SLEEP_SECONDS = '10' setvar component = "$1" echo "Start kubernetes health monitoring for ${component}" source ${KUBE_ENV} if [[ "${component}" == "docker" ]] { docker_monitoring } elif [[ "${component}" == "kubelet" ]] { kubelet_monitoring } else { echo "Health monitoring for component "${component}" is not supported!" }