#!/usr/bin/env bash
# scripts/phase-04-internal-cert-san-verify.sh [PROBE_UNIT] [MODEL]
#
# Read-only phase-04 gate (DOCFIX-059): confirm every INTERNAL keystone-catalog
# endpoint's TLS cert carries its own metal-internal VIP IP as an IP-SAN. Internal
# certs on this deploy are IP-based (no FQDN SAN -- D-019 / D-021), so service-to-service
# TLS on metal-internal (10.12.12.0/22) validates only if the internal VIP IP is present
# in the cert's subjectAltName.
#
# VANTAGE (the load-bearing correction -- DOCFIX-059): metal-internal is an ISOLATED
# service plane (D-052); the operator jumphost is NOT on it, so an s_client from the
# jumphost to 10.12.12.x times out / connection-errors and yields FALSE "missing SAN"
# negatives. This gate therefore probes FROM a unit that IS on the plane (default
# keystone/leader) via `juju exec`. NEVER run an internal-cert check from the jumphost.
#
# Each probe is bounded by `timeout` (an unbounded s_client can hang ~127s on a filtered
# VIP) and classified: OK / NO-SAN (cert present but missing its own IP) / NO-CERT (no
# cert returned even from the on-plane unit -> a real reachability/listener fault). Non-
# https catalog endpoints (e.g. the plain-HTTP glance-simplestreams image-stream) carry
# no cert and are SKIPPED.
#
# Requires: jumphost; jq; openstack + juju; admin-openrc sourced (OS_AUTH_URL set).
# Usage: source ~/admin-openrc && scripts/phase-04-internal-cert-san-verify.sh [PROBE] [MODEL]
# Env: TIMEOUT_S (per-probe bound, default 6)
# Exit: 0 every internal https endpoint OK | 1 a NO-SAN/NO-CERT (HOLD) | 2 precondition
# ASCII + LF.
set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
IFS=$'\n\t'
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=scripts/lib-net.sh
. "$SCRIPT_DIR/lib-net.sh"
PROBE="${1:-keystone/leader}" # any unit ON metal-internal; keystone/leader is on the plane + has openssl
MODEL="${2:-openstack}"
TIMEOUT_S="${TIMEOUT_S:-6}" # bound the per-endpoint TLS probe (a LAN handshake is sub-second)
# --- preconditions ------------------------------------------------------------------
need_jq || exit 2
command -v openstack >/dev/null 2>&1 || { echo "FAIL: openstack client not found" >&2; exit 2; }
command -v juju >/dev/null 2>&1 || { echo "FAIL: juju client not found" >&2; exit 2; }
[ -n "${OS_AUTH_URL:-}" ] || { echo "FAIL: OS_AUTH_URL unset -- 'source ~/admin-openrc' first" >&2; exit 2; }
EPS_JSON="$(openstack endpoint list --interface internal -f json 2>/dev/null || true)"
printf '%s' "$EPS_JSON" | jq -e 'type=="array"' >/dev/null 2>&1 \
|| { echo "FAIL: 'openstack endpoint list --interface internal -f json' did not return JSON" >&2; exit 2; }
echo "=== phase-04 internal-cert SAN verify (read-only; probing FROM $PROBE on metal-internal) ==="
echo
FATAL=0
while IFS=$'\t' read -r svc url; do
[ -n "$url" ] || continue
case "$url" in
https://*) ;;
*) printf 'SKIP %-12s %s (non-TLS endpoint -- no cert)\n' "$svc" "$url"; continue ;;
esac
hp="${url#https://}"; hp="${hp%%/*}"; host="${hp%%:*}"; esc="${host//./\\.}"
# x509 parse is done REMOTELY (inside the bash -c); only the extracted SAN text returns.
# `|| true` so a nonzero juju/timeout does not trip set -e; empty san -> NO-CERT branch.
san="$(juju exec -m "$MODEL" --unit "$PROBE" -- \
bash -c "timeout $TIMEOUT_S openssl s_client -connect $hp </dev/null 2>/dev/null | openssl x509 -noout -ext subjectAltName 2>/dev/null" \
</dev/null 2>/dev/null || true)"
if printf '%s' "$san" | grep -qE "IP Address:${esc}(\$|[, ])"; then
printf 'OK %-12s %s\n' "$svc" "$hp"
elif [ -z "$san" ]; then
printf 'NO-CERT %-12s %s (no cert returned from %s)\n' "$svc" "$hp" "$PROBE"; FATAL=$((FATAL + 1))
else
printf 'NO-SAN %-12s %s (cert present, missing %s)\n' "$svc" "$hp" "$host"; FATAL=$((FATAL + 1))
printf ' SAN: %s\n' "$(printf '%s' "$san" | tr '\n' ' ' | sed 's/ */ /g')"
fi
done < <(printf '%s' "$EPS_JSON" | jq -r '.[] | select(.URL|startswith("https://")) | "\(.["Service Name"])\t\(.URL)"')
echo
if [ "$FATAL" -ne 0 ]; then
echo "Summary: HOLD/FAIL -- $FATAL internal endpoint(s) failed the SAN check."
exit 1
fi
echo "Summary: PASS -- every internal https endpoint cert carries its own metal-internal IP-SAN."
exit 0