#!/usr/bin/env bash
# scripts/phase-06-mgmt-vm.sh
#
# Phase-06 Step 6.2 (MUTATION; allocates a pool FIP): create capi-mgmt-v2
# (gp.large / ubuntu-24.04-noble on capi-mgmt-net), poll ACTIVE, attach a floating IP from
# provider-ext, resolve the tenant (fixed) IP, and persist BOTH to ~/capi-mgmt-net.env
# (the single source for 6.3-6.6 + phase-07; NEITHER value is deterministic per rebuild --
# DOCFIX-038, never hardcode). D-056 flagged-mutation script; human-gated by invocation.
#
# DOCFIX-055: the VM + its FIP must be created IN the capi-mgmt project. admin-openrc scopes the
# token to the ADMIN project, and `server create` has no --project flag (it lands the instance in
# the token's project), so without re-scoping the create cannot see capi-mgmt-sg / capi-mgmt-net
# ("Security group ... not found" -> scheduler ERROR). This script re-scopes to capi-mgmt (the
# D-039 member grant lets admin do so) and PRE-VERIFIES keypair/sg/net are visible in that scope,
# turning the old post-hoc VM ERROR into an upfront precondition. The do-doc 6.2 block omits the
# re-scope -- fix it before Roosevelt.
#
# DOCFIX-054: FIP attach is IDEMPOTENT -- reuse the VM's attached FIP (via its neutron port);
# allocate only when none is present (the do-doc allocated unconditionally -> a re-run would leak).
#
# Tunables via env: VM PROJ PROJ_DOMAIN EXT NET SG KEYPAIR FLAVOR IMAGE ENVFILE POLL_TRIES POLL_SLEEP
# RECREATE_ON_ERROR (1 = delete + recreate an existing ERROR-state VM; default 0)
# Requires: jumphost; admin-openrc; openstack; jq; python3; scripts/resolve_tenant_ip.py.
# Usage: source ~/admin-openrc && bash scripts/phase-06-mgmt-vm.sh
# Exit: 0 VM ACTIVE + FIP attached + env persisted | 1 gate/resolve fail | 2 precondition
# ASCII + LF.
set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RESOLVE="$SCRIPT_DIR/resolve_tenant_ip.py"
VM="${VM:-capi-mgmt-v2}"
PROJ="${PROJ:-capi-mgmt}"
PROJ_DOMAIN="${PROJ_DOMAIN:-capi}"
EXT="${EXT:-provider-ext}"
NET="${NET:-capi-mgmt-net}"
SG="${SG:-capi-mgmt-sg}"
KEYPAIR="${KEYPAIR:-capi-mgmt-key}"
FLAVOR="${FLAVOR:-gp.large}"
IMAGE="${IMAGE:-ubuntu-24.04-noble}"
ENVFILE="${ENVFILE:-$HOME/capi-mgmt-net.env}"
POLL_TRIES="${POLL_TRIES:-40}"; POLL_SLEEP="${POLL_SLEEP:-15}"
for c in openstack jq python3; do command -v "$c" >/dev/null 2>&1 || { echo "FAIL: $c not found" >&2; exit 2; }; done
[ -f "$RESOLVE" ] || { echo "FAIL: helper $RESOLVE not found" >&2; exit 2; }
if [ -z "${OS_AUTH_URL:-}" ] && [ -f "$HOME/admin-openrc" ]; then
# shellcheck disable=SC1091
. "$HOME/admin-openrc"
fi
[ -n "${OS_AUTH_URL:-}" ] || { echo "FAIL: OS_AUTH_URL unset and no ~/admin-openrc" >&2; exit 2; }
# DOCFIX-055: re-scope to the capi-mgmt project (same admin USER; keypair stays user-visible,
# image/flavor are --public, the FIP allocates through capi-mgmt-router's gateway on provider-ext).
export OS_PROJECT_NAME="$PROJ" OS_PROJECT_DOMAIN_NAME="$PROJ_DOMAIN"
unset OS_PROJECT_ID 2>/dev/null || true
openstack token issue >/dev/null 2>&1 || { echo "FAIL: cannot scope a token to project $PROJ (domain $PROJ_DOMAIN) -- is the D-039 member grant present?" >&2; exit 2; }
echo "[OK] scoped to project $PROJ (domain $PROJ_DOMAIN)"
# Pre-create visibility (turns 'not found at create' into an upfront precondition -- DOCFIX-055)
openstack keypair show "$KEYPAIR" >/dev/null 2>&1 || { echo "FAIL: keypair $KEYPAIR not visible in scope $PROJ" >&2; exit 2; }
openstack security group show "$SG" >/dev/null 2>&1 || { echo "FAIL: security group $SG not visible in scope $PROJ" >&2; exit 2; }
openstack network show "$NET" -f value -c id >/dev/null 2>&1 || { echo "FAIL: network $NET not visible in scope $PROJ" >&2; exit 2; }
echo "[OK] keypair + security group + network visible in $PROJ"
# 1. VM verify-or-create (handle a stale ERROR-state instance explicitly)
if openstack server show "$VM" -f value -c id >/dev/null 2>&1; then
CUR=$(openstack server show "$VM" -f value -c status 2>/dev/null || echo '?')
if [ "$CUR" = ERROR ]; then
if [ "${RECREATE_ON_ERROR:-0}" = 1 ]; then
echo "[..] $VM is in ERROR; deleting + recreating (RECREATE_ON_ERROR=1)"
openstack server delete "$VM" --wait
openstack server create --image "$IMAGE" --flavor "$FLAVOR" \
--network "$NET" --security-group "$SG" --key-name "$KEYPAIR" "$VM" >/dev/null
echo "[OK] $VM recreated"
else
echo "GATE FAIL: $VM exists in ERROR state. Delete it, then re-run (or set RECREATE_ON_ERROR=1):" >&2
echo " openstack server delete $VM" >&2
exit 1
fi
else
echo "[SKIP] server $VM exists (status=$CUR)"
fi
else
echo "[..] creating $VM ($FLAVOR / $IMAGE on $NET)"
openstack server create --image "$IMAGE" --flavor "$FLAVOR" \
--network "$NET" --security-group "$SG" --key-name "$KEYPAIR" "$VM" >/dev/null
echo "[OK] $VM create submitted"
fi
# 2. poll ACTIVE (fail fast on ERROR)
echo "=== poll $VM -> ACTIVE ==="
ST=""
for i in $(seq 1 "$POLL_TRIES"); do
ST=$(openstack server show "$VM" -f value -c status 2>/dev/null || echo '?')
echo "[$i] status=$ST"
case "$ST" in
ACTIVE) break ;;
ERROR) echo "GATE FAIL: $VM entered ERROR"; exit 1 ;;
esac
sleep "$POLL_SLEEP"
done
[ "$ST" = ACTIVE ] || { echo "GATE FAIL: $VM not ACTIVE after $POLL_TRIES tries"; exit 1; }
# 3. floating IP -- idempotent (DOCFIX-054): reuse via the VM's port, else allocate + associate
PORT=$(openstack port list --server "$VM" -f value -c ID | head -1)
[ -n "$PORT" ] || { echo "GATE FAIL: no neutron port for $VM"; exit 1; }
FIP=$(openstack floating ip list --port "$PORT" -f value -c "Floating IP Address" 2>/dev/null | head -1)
if [ -n "$FIP" ]; then
echo "[SKIP] $VM already has floating IP $FIP (reusing)"
else
FIP=$(openstack floating ip create "$EXT" -f value -c floating_ip_address)
[ -n "$FIP" ] || { echo "GATE FAIL: FIP allocation returned empty"; exit 1; }
openstack server add floating ip "$VM" "$FIP"
echo "[OK] allocated + associated FIP $FIP"
fi
# 4. tenant (fixed) IP = the server address that is NOT the FIP (tested .py helper)
TENANT_IP=$(openstack server show "$VM" -f json | FIP="$FIP" python3 "$RESOLVE")
[ -n "$TENANT_IP" ] || { echo "GATE FAIL: could not resolve tenant IP for $VM"; exit 1; }
# 5. persist both (single source; neither deterministic per rebuild -- DOCFIX-038)
umask 077
printf 'MGMT_FIP=%s\nMGMT_TENANT_IP=%s\n' "$FIP" "$TENANT_IP" | tee "$ENVFILE"
echo "=== confirm ==="
openstack server show "$VM" -f value -c status -c addresses
echo "Summary: $VM ACTIVE; FIP=$FIP TENANT=$TENANT_IP persisted to $ENVFILE"