Newer
Older
openstack-caracal-ipv4 / scripts / phase-06-mgmt-vm.sh
#!/usr/bin/env bash
# scripts/phase-06-mgmt-vm.sh
#
# Phase-06 Step 6.2 (MUTATION; allocates a pool FIP): create capi-mgmt-v2
# (gp.large / ubuntu-24.04-noble on capi-mgmt-net), poll ACTIVE, attach a floating IP from
# provider-ext, resolve the tenant (fixed) IP, and persist BOTH to ~/capi-mgmt-net.env
# (the single source for 6.3-6.6 + phase-07; NEITHER value is deterministic per rebuild --
# DOCFIX-038, never hardcode). D-056 flagged-mutation script; human-gated by invocation.
#
# DOCFIX-055: the VM + its FIP must be created IN the capi-mgmt project. admin-openrc scopes the
# token to the ADMIN project, and `server create` has no --project flag (it lands the instance in
# the token's project), so without re-scoping the create cannot see capi-mgmt-sg / capi-mgmt-net
# ("Security group ... not found" -> scheduler ERROR). This script re-scopes to capi-mgmt (the
# D-039 member grant lets admin do so) and PRE-VERIFIES keypair/sg/net are visible in that scope,
# turning the old post-hoc VM ERROR into an upfront precondition. The do-doc 6.2 block omits the
# re-scope -- fix it before Roosevelt.
#
# DOCFIX-054: FIP attach is IDEMPOTENT -- reuse the VM's attached FIP (via its neutron port);
# allocate only when none is present (the do-doc allocated unconditionally -> a re-run would leak).
#
# Tunables via env: VM PROJ PROJ_DOMAIN EXT NET SG KEYPAIR FLAVOR IMAGE ENVFILE POLL_TRIES POLL_SLEEP
#                   RECREATE_ON_ERROR (1 = delete + recreate an existing ERROR-state VM; default 0)
# Requires: jumphost; admin-openrc; openstack; jq; python3; scripts/resolve_tenant_ip.py.
# Usage:  source ~/admin-openrc && bash scripts/phase-06-mgmt-vm.sh
# Exit:   0 VM ACTIVE + FIP attached + env persisted | 1 gate/resolve fail | 2 precondition
# ASCII + LF.

set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RESOLVE="$SCRIPT_DIR/resolve_tenant_ip.py"

VM="${VM:-capi-mgmt-v2}"
PROJ="${PROJ:-capi-mgmt}"
PROJ_DOMAIN="${PROJ_DOMAIN:-capi}"
EXT="${EXT:-provider-ext}"
NET="${NET:-capi-mgmt-net}"
SG="${SG:-capi-mgmt-sg}"
KEYPAIR="${KEYPAIR:-capi-mgmt-key}"
FLAVOR="${FLAVOR:-gp.large}"
IMAGE="${IMAGE:-ubuntu-24.04-noble}"
ENVFILE="${ENVFILE:-$HOME/capi-mgmt-net.env}"
POLL_TRIES="${POLL_TRIES:-40}"; POLL_SLEEP="${POLL_SLEEP:-15}"

for c in openstack jq python3; do command -v "$c" >/dev/null 2>&1 || { echo "FAIL: $c not found" >&2; exit 2; }; done
[ -f "$RESOLVE" ] || { echo "FAIL: helper $RESOLVE not found" >&2; exit 2; }
if [ -z "${OS_AUTH_URL:-}" ] && [ -f "$HOME/admin-openrc" ]; then
  # shellcheck disable=SC1091
  . "$HOME/admin-openrc"
fi
[ -n "${OS_AUTH_URL:-}" ] || { echo "FAIL: OS_AUTH_URL unset and no ~/admin-openrc" >&2; exit 2; }

# DOCFIX-055: re-scope to the capi-mgmt project (same admin USER; keypair stays user-visible,
# image/flavor are --public, the FIP allocates through capi-mgmt-router's gateway on provider-ext).
export OS_PROJECT_NAME="$PROJ" OS_PROJECT_DOMAIN_NAME="$PROJ_DOMAIN"
unset OS_PROJECT_ID 2>/dev/null || true
openstack token issue >/dev/null 2>&1 || { echo "FAIL: cannot scope a token to project $PROJ (domain $PROJ_DOMAIN) -- is the D-039 member grant present?" >&2; exit 2; }
echo "[OK] scoped to project $PROJ (domain $PROJ_DOMAIN)"

# Pre-create visibility (turns 'not found at create' into an upfront precondition -- DOCFIX-055)
openstack keypair show "$KEYPAIR" >/dev/null 2>&1        || { echo "FAIL: keypair $KEYPAIR not visible in scope $PROJ" >&2; exit 2; }
openstack security group show "$SG" >/dev/null 2>&1      || { echo "FAIL: security group $SG not visible in scope $PROJ" >&2; exit 2; }
openstack network show "$NET" -f value -c id >/dev/null 2>&1 || { echo "FAIL: network $NET not visible in scope $PROJ" >&2; exit 2; }
echo "[OK] keypair + security group + network visible in $PROJ"

# 1. VM verify-or-create (handle a stale ERROR-state instance explicitly)
if openstack server show "$VM" -f value -c id >/dev/null 2>&1; then
  CUR=$(openstack server show "$VM" -f value -c status 2>/dev/null || echo '?')
  if [ "$CUR" = ERROR ]; then
    if [ "${RECREATE_ON_ERROR:-0}" = 1 ]; then
      echo "[..] $VM is in ERROR; deleting + recreating (RECREATE_ON_ERROR=1)"
      openstack server delete "$VM" --wait
      openstack server create --image "$IMAGE" --flavor "$FLAVOR" \
        --network "$NET" --security-group "$SG" --key-name "$KEYPAIR" "$VM" >/dev/null
      echo "[OK] $VM recreated"
    else
      echo "GATE FAIL: $VM exists in ERROR state. Delete it, then re-run (or set RECREATE_ON_ERROR=1):" >&2
      echo "  openstack server delete $VM" >&2
      exit 1
    fi
  else
    echo "[SKIP] server $VM exists (status=$CUR)"
  fi
else
  echo "[..] creating $VM ($FLAVOR / $IMAGE on $NET)"
  openstack server create --image "$IMAGE" --flavor "$FLAVOR" \
    --network "$NET" --security-group "$SG" --key-name "$KEYPAIR" "$VM" >/dev/null
  echo "[OK] $VM create submitted"
fi

# 2. poll ACTIVE (fail fast on ERROR)
echo "=== poll $VM -> ACTIVE ==="
ST=""
for i in $(seq 1 "$POLL_TRIES"); do
  ST=$(openstack server show "$VM" -f value -c status 2>/dev/null || echo '?')
  echo "[$i] status=$ST"
  case "$ST" in
    ACTIVE) break ;;
    ERROR)  echo "GATE FAIL: $VM entered ERROR"; exit 1 ;;
  esac
  sleep "$POLL_SLEEP"
done
[ "$ST" = ACTIVE ] || { echo "GATE FAIL: $VM not ACTIVE after $POLL_TRIES tries"; exit 1; }

# 3. floating IP -- idempotent (DOCFIX-054): reuse via the VM's port, else allocate + associate
PORT=$(openstack port list --server "$VM" -f value -c ID | head -1)
[ -n "$PORT" ] || { echo "GATE FAIL: no neutron port for $VM"; exit 1; }
FIP=$(openstack floating ip list --port "$PORT" -f value -c "Floating IP Address" 2>/dev/null | head -1)
if [ -n "$FIP" ]; then
  echo "[SKIP] $VM already has floating IP $FIP (reusing)"
else
  FIP=$(openstack floating ip create "$EXT" -f value -c floating_ip_address)
  [ -n "$FIP" ] || { echo "GATE FAIL: FIP allocation returned empty"; exit 1; }
  openstack server add floating ip "$VM" "$FIP"
  echo "[OK] allocated + associated FIP $FIP"
fi

# 4. tenant (fixed) IP = the server address that is NOT the FIP (tested .py helper)
TENANT_IP=$(openstack server show "$VM" -f json | FIP="$FIP" python3 "$RESOLVE")
[ -n "$TENANT_IP" ] || { echo "GATE FAIL: could not resolve tenant IP for $VM"; exit 1; }

# 5. persist both (single source; neither deterministic per rebuild -- DOCFIX-038)
umask 077
printf 'MGMT_FIP=%s\nMGMT_TENANT_IP=%s\n' "$FIP" "$TENANT_IP" | tee "$ENVFILE"
echo "=== confirm ==="
openstack server show "$VM" -f value -c status -c addresses
echo "Summary: $VM ACTIVE; FIP=$FIP TENANT=$TENANT_IP persisted to $ENVFILE"