#!/usr/bin/env bash
# scripts/phase-06-k8s-bootstrap.sh
#
# Phase-06 Steps 6.3 + 6.4 encapsulated (D-056). Runs on the jumphost; drives the
# in-cloud CAPI management VM over ssh.
# 6.3 GATE 1 -- prove the single-homed VM's egress: it can reach the OpenStack
# public API (the D-035 premise) and the internet (image pulls). The API
# target is the Keystone PUBLIC endpoint -- the as-run literal 10.12.4.50:5000
# (6.3 tagged ENV(keystone-vip)); env-overridable per site via KEYSTONE_HOSTPORT.
# 6.4 -- install k8s-snap on the VM and bootstrap it. The bootstrap config MUST
# carry a cluster-config block (DOCFIX-024 -- without it network+dns are
# disabled and the node never goes Ready). extra-sans MUST be the real
# FIP + tenant IP (from ~/capi-mgmt-net.env, per-rebuild, DOCFIX-038).
#
# One-shot -- matches the as-run 6.4 block verbatim (NO idempotency guard): install +
# bootstrap run unconditionally. Re-run is not safe; purge on the VM first (retry hint
# below), exactly the runbook's documented retry path.
# DOCFIX-021: every remote `sudo` gets </dev/null (remote `bash -s` reads the
# script from stdin, so an unredirected `sudo` would consume the script body).
#
# Tunables via env (all DEFAULT to the as-run values): ENVFILE SSH_KEY CHANNEL POD_CIDR
# SVC_CIDR CLUSTER_NAME KEYSTONE_HOSTPORT INET_PROBE PROBE_TIMEOUT
# BOOT_TIMEOUT READY_TIMEOUT
# Requires: jumphost; ssh + the VM key; ~/capi-mgmt-net.env (from phase-06-mgmt-vm.sh).
# Usage: bash scripts/phase-06-k8s-bootstrap.sh
# Exit: 0 egress gate PASS + k8s ready | 1 gate fail | 2 precondition
# ASCII + LF.
set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
ENVFILE="${ENVFILE:-$HOME/capi-mgmt-net.env}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
CHANNEL="${CHANNEL:-1.32-classic/stable}"
POD_CIDR="${POD_CIDR:-10.1.0.0/16}"
SVC_CIDR="${SVC_CIDR:-10.152.183.0/24}"
CLUSTER_NAME="${CLUSTER_NAME:-capi-mgmt-v2}"
INET_PROBE="${INET_PROBE:-1.1.1.1:443}"
PROBE_TIMEOUT="${PROBE_TIMEOUT:-6}"
BOOT_TIMEOUT="${BOOT_TIMEOUT:-10m}"
READY_TIMEOUT="${READY_TIMEOUT:-5m}"
command -v ssh >/dev/null 2>&1 || { echo "FAIL: ssh not found" >&2; exit 2; }
[ -f "$ENVFILE" ] || { echo "FAIL: $ENVFILE not found (run phase-06-mgmt-vm.sh first)" >&2; exit 2; }
# shellcheck disable=SC1090
. "$ENVFILE"
[ -n "${MGMT_FIP:-}" ] || { echo "FAIL: MGMT_FIP unset in $ENVFILE" >&2; exit 2; }
[ -n "${MGMT_TENANT_IP:-}" ] || { echo "FAIL: MGMT_TENANT_IP unset in $ENVFILE" >&2; exit 2; }
[ -f "$SSH_KEY" ] || { echo "FAIL: ssh key $SSH_KEY not found" >&2; exit 2; }
MGMT_VM="$MGMT_FIP"
SSH_OPTS=(-i "$SSH_KEY" -o BatchMode=yes -o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null -o ConnectTimeout=10)
# --- Keystone public host:port -- the as-run literal (6.3 tagged ENV(keystone-vip));
# env-overridable per site. NOT discovered -- this is the value that ran verbatim. ---
KEYSTONE_HOSTPORT="${KEYSTONE_HOSTPORT:-10.12.4.50:5000}"
echo "[OK] Keystone public endpoint: $KEYSTONE_HOSTPORT"
KHOST="${KEYSTONE_HOSTPORT%%:*}"; KPORT="${KEYSTONE_HOSTPORT##*:}"
IHOST="${INET_PROBE%%:*}"; IPORT="${INET_PROBE##*:}"
if [ -z "$KHOST" ] || [ -z "$KPORT" ] || [ "$KHOST" = "$KPORT" ]; then
echo "FAIL: bad KEYSTONE_HOSTPORT '$KEYSTONE_HOSTPORT' (want host:port)" >&2; exit 2
fi
# --- 6.3 GATE 1: VM egress (API VIP + internet) ---
echo "=== 6.3 GATE 1: VM -> Keystone $KHOST:$KPORT + internet $IHOST:$IPORT ==="
g1=$(ssh "${SSH_OPTS[@]}" ubuntu@"$MGMT_VM" \
bash -s "$KHOST" "$KPORT" "$IHOST" "$IPORT" "$PROBE_TIMEOUT" <<'REOF' 2>&1 || true
set -u
khost="$1"; kport="$2"; ihost="$3"; iport="$4"; t="$5"; ok=1
if timeout "$t" bash -c "exec 3<>/dev/tcp/$khost/$kport" 2>/dev/null; then echo "VIP-OK $khost:$kport"; else echo "VIP-FAIL $khost:$kport"; ok=0; fi
if timeout "$t" bash -c "exec 3<>/dev/tcp/$ihost/$iport" 2>/dev/null; then echo "NET-OK $ihost:$iport"; else echo "NET-FAIL $ihost:$iport"; ok=0; fi
[ "$ok" = 1 ] && echo "GATE1: PASS" || echo "GATE1: FAIL"
REOF
)
printf '%s\n' "$g1" | sed 's/^/ /'
printf '%s\n' "$g1" | grep -q 'GATE1: PASS' || { echo "GATE FAIL: VM egress probe did not pass (see above)" >&2; exit 1; }
echo "[OK] GATE 1 passed -- single-NIC VM egress to the OpenStack public API works (D-035 premise)"
# --- 6.4 k8s-snap install + bootstrap ---
echo "=== 6.4 k8s-snap install + bootstrap ($CHANNEL) ==="
b=$(ssh "${SSH_OPTS[@]}" ubuntu@"$MGMT_VM" \
bash -s "$MGMT_FIP" "$MGMT_TENANT_IP" "$CHANNEL" "$POD_CIDR" "$SVC_CIDR" "$CLUSTER_NAME" "$BOOT_TIMEOUT" "$READY_TIMEOUT" <<'REOF' 2>&1 || true
set -euo pipefail
FIP="$1"; TENANT="$2"; CH="$3"; POD="$4"; SVC="$5"; NAME="$6"; BT="$7"; RT="$8"
echo "=== install k8s snap $CH ==="
sudo snap install k8s --classic --channel="$CH" </dev/null
echo "=== write bootstrap config (DOCFIX-024: cluster-config block REQUIRED) ==="
sudo tee /root/bootstrap-config.yaml >/dev/null <<CFG
cluster-config:
network:
enabled: true
dns:
enabled: true
pod-cidr: $POD
service-cidr: $SVC
extra-sans:
- $FIP
- $TENANT
CFG
sudo cat /root/bootstrap-config.yaml
echo "=== bootstrap (timeout $BT) ==="
sudo k8s bootstrap --name "$NAME" --file /root/bootstrap-config.yaml --timeout "$BT" </dev/null
echo "=== status ==="
sudo k8s status --wait-ready --timeout "$RT" </dev/null
echo "BOOT: READY"
REOF
)
printf '%s\n' "$b" | sed 's/^/ /'
printf '%s\n' "$b" | grep -q 'BOOT: READY' || {
echo "GATE FAIL: k8s did not reach ready (see above)." >&2
echo " Retry on the VM: sudo snap remove k8s --purge </dev/null then re-run this script." >&2
exit 1; }
echo "Summary: GATE 1 PASS; k8s ($CHANNEL) bootstrapped and ready on $CLUSTER_NAME (FIP $MGMT_FIP / tenant $MGMT_TENANT_IP)."