#!/bin/bash # Copyright 2016 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -o errexit set -o nounset set -o pipefail # A library of helper functions that each provider hosting Kubernetes must implement to use cluster/kube-*.sh scripts. setvar KUBE_ROOT = "$(dirname "${BASH_SOURCE[0]}")/../.." # shellcheck source=./config-common.sh source "${KUBE_ROOT}/cluster/photon-controller/config-common.sh" # shellcheck source=./config-default.sh source "${KUBE_ROOT}/cluster/photon-controller/${KUBE_CONFIG_FILE-"config-default.sh"}" # shellcheck source=../common.sh source "${KUBE_ROOT}/cluster/common.sh" readonly PHOTON="photon -n" # Naming scheme for VMs (masters & nodes) readonly MASTER_NAME="${INSTANCE_PREFIX}-master" # shell check claims this doesn't work because you can't use a variable in a brace # range. It does work because we're calling eval. # shellcheck disable=SC2051 readonly NODE_NAMES=($(eval echo "${INSTANCE_PREFIX}"-node-{1.."${NUM_NODES}"})) ##################################################################### # # Public API # ##################################################################### # # detect-master will query Photon Controller for the Kubernetes master. # It assumes that the VM name for the master is unique. # It will set KUBE_MASTER_ID to be the VM ID of the master # It will set KUBE_MASTER_IP to be the IP address of the master # If the silent parameter is passed, it will not print when the master # is found: this is used internally just to find the MASTER # proc detect-master { local silent=${1:-""} local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}" setvar KUBE_MASTER = ${MASTER_NAME} setvar KUBE_MASTER_ID = ${KUBE_MASTER_ID:-""} setvar KUBE_MASTER_IP = ${KUBE_MASTER_IP:-""} # We don't want silent failure: we check for failure set +o pipefail if [[ -z ${KUBE_MASTER_ID} ]] { setvar KUBE_MASTER_ID = $(${PHOTON} vm list ${tenant_args} | grep $'\t'"kubernetes-master"$'\t' | awk '{print $1}') } if [[ -z ${KUBE_MASTER_ID} ]] { kube::log::error "Could not find Kubernetes master node ID. Make sure you've launched a cluster with kube-up.sh" exit 1 } if [[ -z "${KUBE_MASTER_IP-}" ]] { # Pick out the NICs that have a MAC address owned VMware (with OUI 00:0C:29) # Make sure to ignore lines that have a network interface but no address setvar KUBE_MASTER_IP = $(${PHOTON} vm networks "${KUBE_MASTER_ID}" | grep -i $'\t'"00:0C:29" | grep -E '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 | awk -F'\t' '{print $3}') } if [[ -z "${KUBE_MASTER_IP-}" ]] { kube::log::error "Could not find Kubernetes master node IP. Make sure you've launched a cluster with 'kube-up.sh'" >&2 exit 1 } if [[ -z ${silent} ]] { kube::log::status "Master: $KUBE_MASTER ($KUBE_MASTER_IP)" } # Reset default set in common.sh set -o pipefail } # # detect-nodes will query Photon Controller for the Kubernetes nodes # It assumes that the VM name for the nodes are unique. # It assumes that NODE_NAMES has been set # It will set KUBE_NODE_IP_ADDRESSES to be the VM IPs of the nodes # It will set the KUBE_NODE_IDS to be the VM IDs of the nodes # If the silent parameter is passed, it will not print when the nodes # are found: this is used internally just to find the MASTER # proc detect-nodes { local silent=${1:-""} local failure=0 local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}" setvar KUBE_NODE_IP_ADDRESSES = ''() setvar KUBE_NODE_IDS = ''() # We don't want silent failure: we check for failure set +o pipefail for (( i=0; i<${#NODE_NAMES[@]}; i++)); do local node_id node_id=$(${PHOTON} vm list ${tenant_args} | grep $'\t'"${NODE_NAMES[${i}]}"$'\t' | awk '{print $1}') if [[ -z ${node_id} ]]; then kube::log::error "Could not find ${NODE_NAMES[${i}]}" failure=1 fi KUBE_NODE_IDS+=("${node_id}") # Pick out the NICs that have a MAC address owned VMware (with OUI 00:0C:29) # Make sure to ignore lines that have a network interface but no address node_ip=$(${PHOTON} vm networks "${node_id}" | grep -i $'\t'"00:0C:29" | grep -E '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 | awk -F'\t' '{print $3}') KUBE_NODE_IP_ADDRESSES+=("${node_ip}") if [[ -z ${silent} ]]; then kube::log::status "Node: ${NODE_NAMES[${i}]} (${KUBE_NODE_IP_ADDRESSES[${i}]})" fi done if [[ ${failure} -ne 0 ]] { exit 1 } # Reset default set in common.sh set -o pipefail } # Get node names if they are not static. proc detect-node-names { echo "TODO: detect-node-names" 1>&2 } # # Verifies that this computer has sufficient software installed # so that it can run the rest of the script. # proc verify-prereqs { verify-cmd-in-path photon verify-cmd-in-path ssh verify-cmd-in-path scp verify-cmd-in-path ssh-add verify-cmd-in-path openssl verify-cmd-in-path mkisofs } # # The entry point for bringing up a Kubernetes cluster # proc kube-up { verify-prereqs verify-ssh-prereqs verify-photon-config kube::util::ensure-temp-dir find-release-tars find-image-id load-or-gen-kube-basicauth gen-cloud-init-iso gen-master-start create-master-vm install-salt-on-master gen-node-start install-salt-on-nodes detect-nodes -s install-kubernetes-on-master install-kubernetes-on-nodes wait-master-api wait-node-apis setup-pod-routes copy-kube-certs kube::log::status "Creating kubeconfig..." create-kubeconfig } # Delete a kubernetes cluster proc kube-down { detect-master detect-nodes pc-delete-vm ${KUBE_MASTER} ${KUBE_MASTER_ID} for (( node=0; node<${#KUBE_NODE_IDS[@]}; node++)); do pc-delete-vm "${NODE_NAMES[${node}]}" "${KUBE_NODE_IDS[${node}]}" done } # Update a kubernetes cluster proc kube-push { echo "TODO: kube-push" 1>&2 } # Prepare update a kubernetes component proc prepare-push { echo "TODO: prepare-push" 1>&2 } # Update a kubernetes master proc push-master { echo "TODO: push-master" 1>&2 } # Update a kubernetes node proc push-node { echo "TODO: push-node" 1>&2 } # Execute prior to running tests to build a release if required for env proc test-build-release { echo "TODO: test-build-release" 1>&2 } # Execute prior to running tests to initialize required structure proc test-setup { echo "TODO: test-setup" 1>&2 } # Execute after running tests to perform any required clean-up proc test-teardown { echo "TODO: test-teardown" 1>&2 } ##################################################################### # # Internal functions # ##################################################################### # # Uses Photon Controller to make a VM # Takes two parameters: # - The name of the VM (Assumed to be unique) # - The name of the flavor to create the VM (Assumed to be unique) # # It assumes that the variables in config-common.sh (PHOTON_TENANT, etc) # are set correctly. # # It also assumes the cloud-init ISO has been generated # # When it completes, it sets two environment variables for use by the # caller: _VM_ID (the ID of the created VM) and _VM_IP (the IP address # of the created VM) # proc pc-create-vm { local vm_name="${1}" local vm_flavor="${2}" local rc=0 local i=0 # Create the VM local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}" local vm_args="--name ${vm_name} --image ${PHOTON_IMAGE_ID} --flavor ${vm_flavor}" local disk_args="disk-1 ${PHOTON_DISK_FLAVOR} boot=true" setvar rc = '0' setvar _VM_ID = $(${PHOTON} vm create ${tenant_args} ${vm_args} --disks "${disk_args}" 2>&1) || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "Failed to create VM. Error output:" echo ${_VM_ID} exit 1 } kube::log::status "Created VM ${vm_name}: ${_VM_ID}" # Start the VM # Note that the VM has cloud-init in it, and we attach an ISO that # contains a user-data.txt file for cloud-init. When the VM starts, # cloud-init will temporarily mount the ISO and configure the VM # Our user-data will configure the 'kube' user and set up the ssh # authorized keys to allow us to ssh to the VM and do further work. run-cmd "${PHOTON} vm attach-iso -p ${KUBE_TEMP}/cloud-init.iso ${_VM_ID}" run-cmd "${PHOTON} vm start ${_VM_ID}" kube::log::status "Started VM ${vm_name}, waiting for network address..." # Wait for the VM to be started and connected to the network setvar have_network = '0' for i in {1..120} { # photon -n vm networks print several fields: # NETWORK MAC IP GATEWAY CONNECTED? # We wait until CONNECTED is True setvar rc = '0' setvar networks = $(${PHOTON} vm networks "${_VM_ID}") || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "'${PHOTON} vm networks ${_VM_ID}' failed. Error output: " echo ${networks} } setvar networks = $(echo "${networks}" | grep True) || setvar rc = ""$? if [[ ${rc} -eq 0 ]] { setvar have_network = '1' break; } sleep 1 } # Fail if the VM didn't come up if [[ ${have_network} -eq 0 ]] { kube::log::error "VM ${vm_name} failed to start up: no IP was found" exit 1 } # Find the IP address of the VM setvar _VM_IP = $(${PHOTON} vm networks "${_VM_ID}" | head -1 | awk -F'\t' '{print $3}') kube::log::status "VM ${vm_name} has IP: ${_VM_IP}" } # # Delete one of our VMs # If it is STARTED, it will be stopped first. # proc pc-delete-vm { local vm_name="${1}" local vm_id="${2}" local rc=0 kube::log::status "Deleting VM ${vm_name}" # In some cases, head exits before photon, so the pipline exits with # SIGPIPE. We disable the pipefile option to hide that failure. set +o pipefail ${PHOTON} vm show ${vm_id} | head -1 | grep STARTED > /dev/null 2>&1 || setvar rc = ""$? set +o pipefail if [[ ${rc} -eq 0 ]] { ${PHOTON} vm stop ${vm_id} > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "Error: could not stop ${vm_name} ($vm_id)" kube::log::error "Please investigate and stop manually" return } } setvar rc = '0' ${PHOTON} vm delete ${vm_id} > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "Error: could not delete ${vm_name} ($vm_id)" kube::log::error "Please investigate and delete manually" } } # # Looks for the image named PHOTON_IMAGE # Sets PHOTON_IMAGE_ID to be the id of that image. # We currently assume there is exactly one image with name # proc find-image-id { local rc=0 setvar PHOTON_IMAGE_ID = $(${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | head -1 | grep READY | awk -F'\t' '{print $1}') if [[ ${rc} -ne 0 ]] { kube::log::error "Cannot find image \"${PHOTON_IMAGE}\"" setvar fail = '1' } } # # Generate an ISO with a single file called user-data.txt # This ISO will be used to configure cloud-init (which is already # on the VM). We will tell cloud-init to create the kube user/group # and give ourselves the ability to ssh to the VM with ssh. We also # allow people to ssh with the same password that was randomly # generated for access to Kubernetes as a backup method. # # Assumes environment variables: # - VM_USER # - KUBE_PASSWORD (randomly generated password) # proc gen-cloud-init-iso { local password_hash setvar password_hash = $(openssl passwd -1 "${KUBE_PASSWORD}") local ssh_key setvar ssh_key = $(ssh-add -L | head -1) # Make the user-data file that will be used by cloud-init shell { echo "#cloud-config" echo "" echo "groups:" echo " - ${VM_USER}" echo "" echo "users:" echo " - name: ${VM_USER}" echo " gecos: Kubernetes" echo " primary-group: ${VM_USER}" echo " lock-passwd: false" echo " passwd: ${password_hash}" echo " ssh-authorized-keys: " echo " - ${ssh_key}" echo " sudo: ALL=(ALL) NOPASSWD:ALL" echo " shell: /bin/bash" echo "" echo "hostname:" echo " - hostname: kube" } > "${KUBE_TEMP}/user-data.txt" # Make the ISO that will contain the user-data # The -rock option means that we'll generate real filenames (long and with case) run-cmd "mkisofs -rock -o ${KUBE_TEMP}/cloud-init.iso ${KUBE_TEMP}/user-data.txt" } # # Generate a script used to install salt on the master # It is placed into $KUBE_TEMP/master-start.sh # proc gen-master-start { python "${KUBE_ROOT}/third_party/htpasswd/htpasswd.py" \ -b -c "${KUBE_TEMP}/htpasswd" ${KUBE_USER} ${KUBE_PASSWORD} local htpasswd setvar htpasswd = $(cat "${KUBE_TEMP}/htpasswd") # This calculation of the service IP should work, but if you choose an # alternate subnet, there's a small chance you'd need to modify the # service_ip, below. We'll choose an IP like 10.244.240.1 by taking # the first three octets of the SERVICE_CLUSTER_IP_RANGE and tacking # on a .1 local octets local service_ip setvar octets = ''($(echo "${SERVICE_CLUSTER_IP_RANGE}" | sed -e 's|/.*||' -e 's/\./ /g')) ((octets[3]+=1)) setvar service_ip = $(echo "${octets[*]}" | sed 's/ /./g') setvar MASTER_EXTRA_SANS = ""IP:${service_ip},DNS:${MASTER_NAME},${MASTER_EXTRA_SANS}"" shell { echo "#! /bin/bash" echo "readonly MY_NAME=${MASTER_NAME}" grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/hostname.sh" echo "cd /home/kube/cache/kubernetes-install" echo "readonly KUBE_MASTER_IP='{$KUBE_MASTER_IP}'" echo "readonly MASTER_NAME='${MASTER_NAME}'" echo "readonly MASTER_IP_RANGE='${MASTER_IP_RANGE}'" echo "readonly INSTANCE_PREFIX='${INSTANCE_PREFIX}'" echo "readonly NODE_INSTANCE_PREFIX='${INSTANCE_PREFIX}-node'" echo "readonly NODE_IP_RANGES='${NODE_IP_RANGES}'" echo "readonly SERVICE_CLUSTER_IP_RANGE='${SERVICE_CLUSTER_IP_RANGE}'" echo "readonly ENABLE_NODE_LOGGING='${ENABLE_NODE_LOGGING:-false}'" echo "readonly LOGGING_DESTINATION='${LOGGING_DESTINATION:-}'" echo "readonly ENABLE_CLUSTER_DNS='${ENABLE_CLUSTER_DNS:-false}'" echo "readonly ENABLE_CLUSTER_UI='${ENABLE_CLUSTER_UI:-false}'" echo "readonly DNS_SERVER_IP='${DNS_SERVER_IP:-}'" echo "readonly DNS_DOMAIN='${DNS_DOMAIN:-}'" echo "readonly KUBE_USER='${KUBE_USER:-}'" echo "readonly KUBE_PASSWORD='${KUBE_PASSWORD:-}'" echo "readonly SERVER_BINARY_TAR='${SERVER_BINARY_TAR##*/}'" echo "readonly SALT_TAR='${SALT_TAR##*/}'" echo "readonly MASTER_HTPASSWD='${htpasswd}'" echo "readonly E2E_STORAGE_TEST_ENVIRONMENT='${E2E_STORAGE_TEST_ENVIRONMENT:-}'" echo "readonly MASTER_EXTRA_SANS='${MASTER_EXTRA_SANS:-}'" grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/create-dynamic-salt-files.sh" grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/install-release.sh" grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/salt-master.sh" } > "${KUBE_TEMP}/master-start.sh" } # # Generate the scripts for each node to install salt # proc gen-node-start { local i for (( i=0; i<${#NODE_NAMES[@]}; i++)); do ( echo "#! /bin/bash" echo "readonly MY_NAME=${NODE_NAMES[${i}]}" grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/hostname.sh" echo "KUBE_MASTER=${KUBE_MASTER}" echo "KUBE_MASTER_IP=${KUBE_MASTER_IP}" echo "NODE_IP_RANGE=$NODE_IP_RANGES" grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/salt-minion.sh" ) > "${KUBE_TEMP}/node-start-${i}.sh" done } # # Create a script that will run on the Kubernetes master and will run salt # to configure the master. We make it a script instead of just running a # single ssh command so that we can get logging. # proc gen-master-salt { gen-salt "kubernetes-master" } # # Create scripts that will be run on the Kubernetes master. Each of these # will invoke salt to configure one of the nodes # proc gen-node-salt { local i for (( i=0; i<${#NODE_NAMES[@]}; i++)); do gen-salt "${NODE_NAMES[${i}]}" done } # # Shared implementation for gen-master-salt and gen-node-salt # Writes a script that installs Kubernetes with salt # The core of the script is simple (run 'salt ... state.highstate') # We also do a bit of logging so we can debug problems # # There is also a funky workaround for an issue with docker 1.9 # (elsewhere we peg ourselves to docker 1.9). It's fixed in 1.10, # so we should be able to remove it in the future # https://github.com/docker/docker/issues/18113 # The problem is that sometimes the install (with apt-get) of # docker fails. Deleting a file and retrying fixes it. # # Tell shellcheck to ignore our variables within single quotes: # We're writing a script, not executing it, so this is normal # shellcheck disable=SC2016 proc gen-salt { setvar node_name = ${1} shell { echo '#!/bin/bash' echo '' echo "node=${node_name}" echo 'out=/tmp/${node}-salt.out' echo 'log=/tmp/${node}-salt.log' echo '' echo 'echo $(date) >> $log' echo 'salt ${node} state.highstate -t 30 --no-color > ${out}' echo 'grep -E "Failed:[[:space:]]+0" ${out}' echo 'success=$?' echo 'cat ${out} >> ${log}' echo '' echo 'if [[ ${success} -ne 0 ]]; then' echo ' # Did we try to install docker-engine?' echo ' attempted=$(grep docker-engine ${out} | wc -l)' echo ' # Is docker-engine installed?' echo ' installed=$(salt --output=txt ${node} pkg.version docker-engine | wc -l)' echo ' if [[ ${attempted} -ne 0 && ${installed} -eq 0 ]]; then' echo ' echo "Unwedging docker-engine install" >> ${log}' echo ' salt ${node} cmd.run "rm -f /var/lib/docker/network/files/local-kv.db"' echo ' fi' echo 'fi' echo 'exit ${success}' } > "${KUBE_TEMP}/${node_name}-salt.sh" } # # Generate a script to add a route to a host (master or node) # The script will do two things: # 1. Add the route immediately with the route command # 2. Persist the route by saving it in /etc/network/interfaces # This was done with a script because it was easier to get the quoting right # and make it clear. # proc gen-add-route { setvar route = ${1} setvar gateway = ${2} shell { echo '#!/bin/bash' echo '' echo '# Immediately add route' echo "sudo route add -net ${route} gw ${gateway}" echo '' echo '# Persist route so it lasts over restarts' echo 'sed -in "s|^iface eth0.*|&\n post-up route add -net' "${route} gw ${gateway}|"'" /etc/network/interfaces' } > "${KUBE_TEMP}/add-route.sh" } # # Create the Kubernetes master VM # Sets global variables: # - KUBE_MASTER (Name) # - KUBE_MASTER_ID (Photon VM ID) # - KUBE_MASTER_IP (IP address) # proc create-master-vm { kube::log::status "Starting master VM..." pc-create-vm ${MASTER_NAME} ${PHOTON_MASTER_FLAVOR} setvar KUBE_MASTER = ${MASTER_NAME} setvar KUBE_MASTER_ID = ${_VM_ID} setvar KUBE_MASTER_IP = ${_VM_IP} } # # Install salt on the Kubernetes master # Relies on the master-start.sh script created in gen-master-start # proc install-salt-on-master { kube::log::status "Installing salt on master..." upload-server-tars ${MASTER_NAME} ${KUBE_MASTER_IP} run-script-remotely ${KUBE_MASTER_IP} "${KUBE_TEMP}/master-start.sh" } # # Installs salt on Kubernetes nodes in parallel # Relies on the node-start script created in gen-node-start # proc install-salt-on-nodes { kube::log::status "Creating nodes and installing salt on them..." # Start each of the VMs in parallel # In the future, we'll batch this because it doesn't scale well # past 10 or 20 nodes local node for (( node=0; node<${#NODE_NAMES[@]}; node++)); do ( pc-create-vm "${NODE_NAMES[${node}]}" "${PHOTON_NODE_FLAVOR}" run-script-remotely "${_VM_IP}" "${KUBE_TEMP}/node-start-${node}.sh" ) & done # Wait for the node VM startups to complete local fail=0 local job for job in $(jobs -p) { wait ${job} || setvar fail = $((fail + 1)) } if (( fail != 0 )) { kube::log::error "Failed to start ${fail}/${NUM_NODES} nodes" exit 1 } } # # Install Kubernetes on the master. # This uses the kubernetes-master-salt.sh script created by gen-master-salt # That script uses salt to install Kubernetes # proc install-kubernetes-on-master { # Wait until salt-master is running: it may take a bit try-until-success-ssh ${KUBE_MASTER_IP} \ "Waiting for salt-master to start on ${KUBE_MASTER}" \ "pgrep salt-master" gen-master-salt copy-file-to-vm ${_VM_IP} "${KUBE_TEMP}/kubernetes-master-salt.sh" "/tmp/kubernetes-master-salt.sh" try-until-success-ssh ${KUBE_MASTER_IP} \ "Installing Kubernetes on ${KUBE_MASTER} via salt" \ "sudo /bin/bash /tmp/kubernetes-master-salt.sh" } # # Install Kubernetes on the the nodes in parallel # This uses the kubernetes-master-salt.sh script created by gen-node-salt # That script uses salt to install Kubernetes # proc install-kubernetes-on-nodes { gen-node-salt # Run in parallel to bring up the cluster faster # TODO: Batch this so that we run up to N in parallel, so # we don't overload this machine or the salt master local node for (( node=0; node<${#NODE_NAMES[@]}; node++)); do ( copy-file-to-vm "${_VM_IP}" "${KUBE_TEMP}/${NODE_NAMES[${node}]}-salt.sh" "/tmp/${NODE_NAMES[${node}]}-salt.sh" try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \ "Waiting for salt-master to start on ${NODE_NAMES[${node}]}" \ "pgrep salt-minion" try-until-success-ssh "${KUBE_MASTER_IP}" \ "Installing Kubernetes on ${NODE_NAMES[${node}]} via salt" \ "sudo /bin/bash /tmp/${NODE_NAMES[${node}]}-salt.sh" ) & done # Wait for the Kubernetes installations to complete local fail=0 local job for job in $(jobs -p) { wait ${job} || setvar fail = $((fail + 1)) } if (( fail != 0 )) { kube::log::error "Failed to start install Kubernetes on ${fail} out of ${NUM_NODES} nodess" exit 1 } } # # Upload the Kubernetes tarballs to the master # proc upload-server-tars { setvar vm_name = ${1} setvar vm_ip = ${2} run-ssh-cmd ${vm_ip} "mkdir -p /home/kube/cache/kubernetes-install" local tar for tar in "${SERVER_BINARY_TAR}" "${SALT_TAR}" { local base_tar setvar base_tar = $(basename "${tar}") kube::log::status "Uploading ${base_tar} to ${vm_name}..." copy-file-to-vm ${vm_ip} ${tar} "/home/kube/cache/kubernetes-install/${tar##*/}" } } # # Wait for the Kubernets healthz API to be responsive on the master # proc wait-master-api { local curl_creds="--insecure --user ${KUBE_USER}:${KUBE_PASSWORD}" local curl_output="--fail --output /dev/null --silent" local curl_net="--max-time 1" try-until-success "Waiting for Kubernetes API on ${KUBE_MASTER}" \ "curl ${curl_creds} ${curl_output} ${curl_net} https://${KUBE_MASTER_IP}/healthz" } # # Wait for the Kubernetes healthz API to be responsive on each node # proc wait-node-apis { local curl_output="--fail --output /dev/null --silent" local curl_net="--max-time 1" for (( i=0; i<${#NODE_NAMES[@]}; i++)); do try-until-success "Waiting for Kubernetes API on ${NODE_NAMES[${i}]}..." \ "curl ${curl_output} ${curl_net} http://${KUBE_NODE_IP_ADDRESSES[${i}]}:10250/healthz" done } # # Configure the nodes so the pods can communicate # Each node will have a bridge named cbr0 for the NODE_IP_RANGES # defined in config-default.sh. This finds the IP subnet (assigned # by Kubernetes) to nodes and configures routes so they can communicate # # Also configure the master to be able to talk to the nodes. This is # useful so that you can get to the UI from the master. # proc setup-pod-routes { local node setvar KUBE_NODE_BRIDGE_NETWORK = ''() for (( node=0; node<${#NODE_NAMES[@]}; node++)); do # This happens in two steps (wait for an address, wait for a non 172.x.x.x address) # because it's both simpler and more clear what's happening. try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \ "Waiting for cbr0 bridge on ${NODE_NAMES[${node}]} to have an address" \ 'sudo ifconfig cbr0 | grep -oP "inet addr:\K\S+"' try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \ "Waiting for cbr0 bridge on ${NODE_NAMES[${node}]} to have correct address" \ 'sudo ifconfig cbr0 | grep -oP "inet addr:\K\S+" | grep -v "^172."' run-ssh-cmd "${KUBE_NODE_IP_ADDRESSES[${node}]}" 'sudo ip route show | grep -E "dev cbr0" | cut -d " " -f1' KUBE_NODE_BRIDGE_NETWORK+=(${_OUTPUT}) kube::log::status "cbr0 on ${NODE_NAMES[${node}]} is ${_OUTPUT}" done local i local j for (( i=0; i<${#NODE_NAMES[@]}; i++)); do kube::log::status "Configuring pod routes on ${NODE_NAMES[${i}]}..." gen-add-route "${KUBE_NODE_BRIDGE_NETWORK[${i}]}" "${KUBE_NODE_IP_ADDRESSES[${i}]}" run-script-remotely "${KUBE_MASTER_IP}" "${KUBE_TEMP}/add-route.sh" for (( j=0; j<${#NODE_NAMES[@]}; j++)); do if [[ "${i}" != "${j}" ]]; then gen-add-route "${KUBE_NODE_BRIDGE_NETWORK[${j}]}" "${KUBE_NODE_IP_ADDRESSES[${j}]}" run-script-remotely "${KUBE_NODE_IP_ADDRESSES[${i}]}" "${KUBE_TEMP}/add-route.sh" fi done done } # # Copy the certificate/key from the Kubernetes master # These are used to create the kubeconfig file, which allows # users to use kubectl easily # # We also set KUBE_CERT, KUBE_KEY, CA_CERT, and CONTEXT because they # are needed by create-kubeconfig from common.sh to generate # the kube config file. # proc copy-kube-certs { local cert="kubecfg.crt" local key="kubecfg.key" local ca="ca.crt" local cert_dir="/srv/kubernetes" kube::log::status "Copying credentials from ${KUBE_MASTER}" # Set global environment variables: needed by create-kubeconfig # in common.sh export KUBE_CERT="${KUBE_TEMP}/${cert}" export KUBE_KEY="${KUBE_TEMP}/${key}" export CA_CERT="${KUBE_TEMP}/${ca}" export CONTEXT="photon-${INSTANCE_PREFIX}" run-ssh-cmd ${KUBE_MASTER_IP} "sudo chmod 644 ${cert_dir}/${cert}" run-ssh-cmd ${KUBE_MASTER_IP} "sudo chmod 644 ${cert_dir}/${key}" run-ssh-cmd ${KUBE_MASTER_IP} "sudo chmod 644 ${cert_dir}/${ca}" copy-file-from-vm ${KUBE_MASTER_IP} "${cert_dir}/${cert}" ${KUBE_CERT} copy-file-from-vm ${KUBE_MASTER_IP} "${cert_dir}/${key}" ${KUBE_KEY} copy-file-from-vm ${KUBE_MASTER_IP} "${cert_dir}/${ca}" ${CA_CERT} run-ssh-cmd ${KUBE_MASTER_IP} "sudo chmod 600 ${cert_dir}/${cert}" run-ssh-cmd ${KUBE_MASTER_IP} "sudo chmod 600 ${cert_dir}/${key}" run-ssh-cmd ${KUBE_MASTER_IP} "sudo chmod 600 ${cert_dir}/${ca}" } # # Copies a script to a VM and runs it # Parameters: # - IP of VM # - Path to local file # proc run-script-remotely { local vm_ip=${1} local local_file="${2}" local base_file local remote_file setvar base_file = $(basename "${local_file}") setvar remote_file = ""/tmp/${base_file}"" copy-file-to-vm ${vm_ip} ${local_file} ${remote_file} run-ssh-cmd ${vm_ip} "chmod 700 ${remote_file}" run-ssh-cmd ${vm_ip} "nohup sudo ${remote_file} < /dev/null 1> ${remote_file}.out 2>&1 &" } # # Runs an command on a VM using ssh # Parameters: # - (optional) -i to ignore failure # - IP address of the VM # - Command to run # Assumes environment variables: # - VM_USER # - SSH_OPTS # proc run-ssh-cmd { local ignore_failure="" if [[ "${1}" = "-i" ]] { setvar ignore_failure = ""-i"" shift } local vm_ip=${1} shift local cmd=${1} run-cmd ${ignore_failure} "ssh ${SSH_OPTS} $VM_USER@${vm_ip} $1" } # # Uses scp to copy file to VM # Parameters: # - IP address of the VM # - Path to local file # - Path to remote file # Assumes environment variables: # - VM_USER # - SSH_OPTS # proc copy-file-to-vm { local vm_ip=${1} local local_file=${2} local remote_file=${3} run-cmd "scp ${SSH_OPTS} ${local_file} ${VM_USER}@${vm_ip}:${remote_file}" } proc copy-file-from-vm { local vm_ip=${1} local remote_file=${2} local local_file=${3} run-cmd "scp ${SSH_OPTS} ${VM_USER}@${vm_ip}:${remote_file} ${local_file}" } # # Run a command, print nice error output # Used by copy-file-to-vm and run-ssh-cmd # proc run-cmd { local rc=0 local ignore_failure="" if [[ "${1}" = "-i" ]] { setvar ignore_failure = ${1} shift } local cmd=$1 local output setvar output = $(${cmd} 2>&1) || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { if [[ -z "${ignore_failure}" ]] { kube::log::error "Failed to run command: ${cmd} Output:" echo ${output} exit 1 } } setvar _OUTPUT = ${output} return ${rc} } # # After the initial VM setup, we use SSH with keys to access the VMs # This requires an SSH agent, so we verify that it's running # proc verify-ssh-prereqs { kube::log::status "Validating SSH configuration..." local rc setvar rc = '0' ssh-add -L 1> /dev/null 2> /dev/null || setvar rc = ""$? # "Could not open a connection to your authentication agent." if [[ "${rc}" -eq 2 ]] { # ssh agent wasn't running, so start it and ensure we stop it eval $(ssh-agent) > /dev/null trap-add "kill ${SSH_AGENT_PID}" EXIT } setvar rc = '0' ssh-add -L 1> /dev/null 2> /dev/null || setvar rc = ""$? # "The agent has no identities." if [[ "${rc}" -eq 1 ]] { # Try adding one of the default identities, with or without passphrase. ssh-add || true } # Expect at least one identity to be available. if ! ssh-add -L 1> /dev/null 2> /dev/null { kube::log::error "Could not find or add an SSH identity." kube::log::error "Please start ssh-agent, add your identity, and retry." exit 1 } } # # Verify that Photon Controller has been configured in the way we expect. Specifically # - Have the flavors been created? # - Has the image been uploaded? # TODO: Check the tenant and project as well. proc verify-photon-config { kube::log::status "Validating Photon configuration..." # We don't want silent failure: we check for failure set +o pipefail verify-photon-flavors verify-photon-image verify-photon-tenant # Reset default set in common.sh set -o pipefail } # # Verify that the VM and disk flavors have been created # proc verify-photon-flavors { local rc=0 ${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_MASTER_FLAVOR}$" > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "ERROR: Cannot find VM flavor named ${PHOTON_MASTER_FLAVOR}" exit 1 } if [[ "${PHOTON_MASTER_FLAVOR}" != "${PHOTON_NODE_FLAVOR}" ]] { setvar rc = '0' ${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_NODE_FLAVOR}$" > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "ERROR: Cannot find VM flavor named ${PHOTON_NODE_FLAVOR}" exit 1 } } ${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_DISK_FLAVOR}$" > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "ERROR: Cannot find disk flavor named ${PHOTON_DISK_FLAVOR}" exit 1 } } # # Verify that we have the image we need, and it's not in error state or # multiple copies # proc verify-photon-image { local rc setvar rc = '0' ${PHOTON} image list | grep -q $'\t'"${PHOTON_IMAGE}"$'\t' > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { kube::log::error "ERROR: Cannot find image \"${PHOTON_IMAGE}\"" exit 1 } setvar rc = '0' ${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | grep ERROR > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -eq 0 ]] { echo "Warning: You have at least one ${PHOTON_IMAGE} image in the ERROR state. You may want to investigate." echo "Images in the ERROR state will be ignored." } setvar rc = '0' setvar num_images = $(${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | grep -c READY) if [[ "${num_images}" -gt 1 ]] { echo "ERROR: You have more than one READY ${PHOTON_IMAGE} image. Ensure there is only one" exit 1 } } proc verify-photon-tenant { local rc setvar rc = '0' ${PHOTON} tenant list | grep -q $'\t'"${PHOTON_TENANT}" > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { echo "ERROR: Cannot find tenant \"${PHOTON_TENANT}\"" exit 1 } ${PHOTON} project list --tenant ${PHOTON_TENANT} | grep -q $'\t'"${PHOTON_PROJECT}"$'\t' > /dev/null 2>&1 || setvar rc = ""$? if [[ ${rc} -ne 0 ]] { echo "ERROR: Cannot find project \"${PHOTON_PROJECT}\"" exit 1 } } # # Verifies that a given command is in the PATH # proc verify-cmd-in-path { setvar cmd = ${1} which ${cmd} >/dev/null || do { kube::log::error "Can't find ${cmd} in PATH, please install and retry." exit 1 } } # # Repeatedly try a command over ssh until it succeeds or until five minutes have passed # The timeout isn't exact, since we assume the command runs instantaneously, and # it doesn't. # proc try-until-success-ssh { local vm_ip=${1} local cmd_description=${2} local cmd=${3} local timeout=600 local sleep_time=5 local max_attempts ((max_attempts=timeout/sleep_time)) kube::log::status "${cmd_description} for up to 10 minutes..." local attempt=0 while true { local rc=0 run-ssh-cmd -i ${vm_ip} ${cmd} || setvar rc = '1' if [[ ${rc} != 0 ]] { if (( attempt == max_attempts )) { kube::log::error "Failed, cannot proceed: you may need to retry to log into the VM to debug" exit 1 } } else { break } setvar attempt = $((attempt+1)) sleep ${sleep_time} } } proc try-until-success { local cmd_description=${1} local cmd=${2} local timeout=600 local sleep_time=5 local max_attempts ((max_attempts=timeout/sleep_time)) kube::log::status "${cmd_description} for up to 10 minutes..." local attempt=0 while true { local rc=0 run-cmd -i ${cmd} || setvar rc = '1' if [[ ${rc} != 0 ]] { if (( attempt == max_attempts )) { kube::log::error "Failed, cannot proceed" exit 1 } } else { break } setvar attempt = $((attempt+1)) sleep ${sleep_time} } } # # Sets up a trap handler # proc trap-add { local handler="${1}" local signal="${2-EXIT}" local cur setvar cur = "$(eval "sh -c 'echo \$3' -- $(trap -p ${signal})")" if [[ -n "${cur}" ]] { setvar handler = ""${cur}; ${handler}"" } # We want ${handler} to expand now, so tell shellcheck # shellcheck disable=SC2064 trap ${handler} ${signal} }