diff --git a/docs/v1-redeploy-changelog.md b/docs/v1-redeploy-changelog.md index b1ed63f..9ed6028 100644 --- a/docs/v1-redeploy-changelog.md +++ b/docs/v1-redeploy-changelog.md @@ -895,8 +895,17 @@ DOCFIX-054 -- do-doc Step 6.2 FIP allocation is NOT idempotent: `openstack floating ip create` runs unconditionally, so a re-run allocates + attaches a SECOND floating IP to capi-mgmt-v2 (FIP-pool - leak + ambiguous apiserver endpoint). The forthcoming phase-06-mgmt-vm.sh (6.2) reuses the VM's - existing FIP when present (allocate only when absent). Fix the do-doc 6.2 block before Roosevelt. + leak + ambiguous apiserver endpoint). scripts/phase-06-mgmt-vm.sh already FIXES this -- it looks up + the VM's neutron port and reuses an attached FIP, allocating only when none is present. (The number + DOCFIX-054 was coined in that script in a prior session; this changelog now records it. Fix the + do-doc 6.2 block before Roosevelt.) + +scripts/phase-06-mgmt-vm.sh + scripts/resolve_tenant_ip.py -- the Step 6.2 VM+FIP mutation + tenant-IP + resolver. Both were untracked from earlier phase-06 prep (present in the working tree, NOT in origin); + committed now WITH a new harness. tests/phase-06-mgmt-vm/ -- resolver unit 5/5 (list shape / + {addr}-dict shape / comma-string shape / only-FIP-empty / no-FIP-first) + integration 6/6 (fresh + create+poll+allocate; idempotent FIP reuse; VM-ERROR abort; never-ACTIVE abort; tenant-unresolvable + abort; missing-auth exit 2) + net.env both-keys-0600 assertion. bash -n + shellcheck clean; ASCII+0CR. ### Next-free numbers Design decision: D-057. Doc fix: DOCFIX-055. diff --git a/scripts/phase-06-mgmt-vm.sh b/scripts/phase-06-mgmt-vm.sh new file mode 100644 index 0000000..1420bab --- /dev/null +++ b/scripts/phase-06-mgmt-vm.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# scripts/phase-06-mgmt-vm.sh +# +# Phase-06 Step 6.2 (MUTATION; allocates a pool FIP): create capi-mgmt-v2 +# (gp.large / ubuntu-24.04-noble on capi-mgmt-net), poll ACTIVE, attach a floating IP from +# provider-ext, resolve the tenant (fixed) IP, and persist BOTH to ~/capi-mgmt-net.env +# (the single source for 6.3-6.6 + phase-07; NEITHER value is deterministic per rebuild -- +# DOCFIX-038, never hardcode). D-056 flagged-mutation script; human-gated by invocation. +# +# DOCFIX-054: FIP attach is IDEMPOTENT here -- if the VM already has a floating IP (looked up +# via its neutron port), it is REUSED; a new FIP is allocated only when none is attached. The +# do-doc block allocated unconditionally (a re-run would leak a second FIP). +# +# Tunables via env: VM PROJ EXT NET SG KEYPAIR FLAVOR IMAGE ENVFILE POLL_TRIES POLL_SLEEP +# Requires: jumphost; admin-openrc; openstack; jq; python3; scripts/resolve_tenant_ip.py. +# Usage: source ~/admin-openrc && bash scripts/phase-06-mgmt-vm.sh +# Exit: 0 VM ACTIVE + FIP attached + env persisted | 1 gate/resolve fail | 2 precondition +# ASCII + LF. + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RESOLVE="$SCRIPT_DIR/resolve_tenant_ip.py" + +VM="${VM:-capi-mgmt-v2}" +PROJ="${PROJ:-capi-mgmt}" +EXT="${EXT:-provider-ext}" +NET="${NET:-capi-mgmt-net}" +SG="${SG:-capi-mgmt-sg}" +KEYPAIR="${KEYPAIR:-capi-mgmt-key}" +FLAVOR="${FLAVOR:-gp.large}" +IMAGE="${IMAGE:-ubuntu-24.04-noble}" +ENVFILE="${ENVFILE:-$HOME/capi-mgmt-net.env}" +POLL_TRIES="${POLL_TRIES:-40}"; POLL_SLEEP="${POLL_SLEEP:-15}" + +for c in openstack jq python3; do command -v "$c" >/dev/null 2>&1 || { echo "FAIL: $c not found" >&2; exit 2; }; done +[ -f "$RESOLVE" ] || { echo "FAIL: helper $RESOLVE not found" >&2; exit 2; } +if [ -z "${OS_AUTH_URL:-}" ] && [ -f "$HOME/admin-openrc" ]; then + # shellcheck disable=SC1091 + . "$HOME/admin-openrc" +fi +[ -n "${OS_AUTH_URL:-}" ] || { echo "FAIL: OS_AUTH_URL unset and no ~/admin-openrc" >&2; exit 2; } +openstack token issue >/dev/null 2>&1 || { echo "FAIL: no scoped token (admin-openrc)" >&2; exit 2; } + +# 1. VM verify-or-create +if openstack server show "$VM" -f value -c id >/dev/null 2>&1; then + echo "[SKIP] server $VM exists" +else + echo "[..] creating $VM ($FLAVOR / $IMAGE on $NET)" + openstack server create --image "$IMAGE" --flavor "$FLAVOR" \ + --network "$NET" --security-group "$SG" --key-name "$KEYPAIR" "$VM" >/dev/null + echo "[OK] $VM create submitted" +fi + +# 2. poll ACTIVE (fail fast on ERROR) +echo "=== poll $VM -> ACTIVE ===" +ST="" +for i in $(seq 1 "$POLL_TRIES"); do + ST=$(openstack server show "$VM" -f value -c status 2>/dev/null || echo '?') + echo "[$i] status=$ST" + case "$ST" in + ACTIVE) break ;; + ERROR) echo "GATE FAIL: $VM entered ERROR"; exit 1 ;; + esac + sleep "$POLL_SLEEP" +done +[ "$ST" = ACTIVE ] || { echo "GATE FAIL: $VM not ACTIVE after $POLL_TRIES tries"; exit 1; } + +# 3. floating IP -- idempotent (DOCFIX-054): reuse via the VM's port, else allocate + associate +PORT=$(openstack port list --server "$VM" -f value -c ID | head -1) +[ -n "$PORT" ] || { echo "GATE FAIL: no neutron port for $VM"; exit 1; } +FIP=$(openstack floating ip list --port "$PORT" -f value -c "Floating IP Address" 2>/dev/null | head -1) +if [ -n "$FIP" ]; then + echo "[SKIP] $VM already has floating IP $FIP (reusing)" +else + FIP=$(openstack floating ip create "$EXT" -f value -c floating_ip_address) + [ -n "$FIP" ] || { echo "GATE FAIL: FIP allocation returned empty"; exit 1; } + openstack server add floating ip "$VM" "$FIP" + echo "[OK] allocated + associated FIP $FIP" +fi + +# 4. tenant (fixed) IP = the server address that is NOT the FIP (tested .py helper) +TENANT_IP=$(openstack server show "$VM" -f json | FIP="$FIP" python3 "$RESOLVE") +[ -n "$TENANT_IP" ] || { echo "GATE FAIL: could not resolve tenant IP for $VM"; exit 1; } + +# 5. persist both (single source; neither deterministic per rebuild -- DOCFIX-038) +umask 077 +printf 'MGMT_FIP=%s\nMGMT_TENANT_IP=%s\n' "$FIP" "$TENANT_IP" | tee "$ENVFILE" +echo "=== confirm ===" +openstack server show "$VM" -f value -c status -c addresses +echo "Summary: $VM ACTIVE; FIP=$FIP TENANT=$TENANT_IP persisted to $ENVFILE" diff --git a/scripts/resolve_tenant_ip.py b/scripts/resolve_tenant_ip.py new file mode 100644 index 0000000..e77cf0e --- /dev/null +++ b/scripts/resolve_tenant_ip.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# scripts/resolve_tenant_ip.py +# stdin: `openstack server show -f json` output ; env FIP= +# Print the server's tenant (fixed) IP -- the address that is NOT the floating IP. +# A single-NIC VM has exactly the two once a FIP is attached; before that, the lone +# fixed IP is returned (FIP unset/empty matches nothing, so the first address wins). +# Robust to both the OSC value shape {"net": ["ip", ...]} and the dict shape +# {"net": [{"addr": "ip"}, ...]} and the comma-joined string shape "ip1, ip2". +import os, json, sys + +def addrs_to_ips(addrs): + ips = [] + for v in (addrs or {}).values(): + if isinstance(v, list): + for item in v: + if isinstance(item, str): + ips.append(item.strip()) + elif isinstance(item, dict) and item.get("addr"): + ips.append(str(item["addr"]).strip()) + elif isinstance(v, str): + ips.extend(x.strip() for x in v.split(",") if x.strip()) + return [ip for ip in ips if ip] + +def main(): + try: + data = json.load(sys.stdin) + except Exception: + return "" + fip = os.environ.get("FIP", "").strip() + for ip in addrs_to_ips(data.get("addresses", {})): + if ip != fip: + return ip + return "" + +if __name__ == "__main__": + print(main()) diff --git a/tests/phase-06-mgmt-vm/fakebin/openstack b/tests/phase-06-mgmt-vm/fakebin/openstack new file mode 100644 index 0000000..b164614 --- /dev/null +++ b/tests/phase-06-mgmt-vm/fakebin/openstack @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +a1="${1:-}"; a2="${2:-}"; a3="${3:-}"; rest=" $* " +[ "$a1 $a2" = "token issue" ] && exit 0 +FIPVAL="${FIPVAL:-10.12.5.103}"; TENANT="${TENANT:-10.20.0.107}" +present_vm() { [ "${VM_PRESENT:-0}" = 1 ] || [ -f "${MK_VM:-/x}" ]; } +case "$a1 $a2" in + "server show") + present_vm || exit 1 + if printf '%s' "$rest" | grep -q -- '-f json'; then + if [ "${ADDR_ONLY_FIP:-0}" = 1 ]; then echo "{\"addresses\":{\"capi-mgmt-net\":[\"$FIPVAL\"]}}" + else echo "{\"addresses\":{\"capi-mgmt-net\":[\"$TENANT\",\"$FIPVAL\"]}}"; fi + elif printf '%s' "$rest" | grep -q -- '-c status'; then + echo "${VM_STATUS:-ACTIVE}" # also covers '-c status -c addresses' confirm (status word present) + else + echo vm-id # -c id existence probe + fi + exit 0 ;; + "server create") : > "${MK_VM:?}"; exit 0 ;; + "server add") exit 0 ;; # server add floating ip + "port list") echo port-id; exit 0 ;; + "floating ip") + case "$a3" in + list) [ "${FIP_PRESENT:-0}" = 1 ] && echo "$FIPVAL"; exit 0 ;; + create) echo "$FIPVAL"; exit 0 ;; + esac ;; +esac +exit 0 diff --git a/tests/phase-06-mgmt-vm/run-tests.sh b/tests/phase-06-mgmt-vm/run-tests.sh new file mode 100644 index 0000000..2a9b27a --- /dev/null +++ b/tests/phase-06-mgmt-vm/run-tests.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# tests/phase-06-mgmt-vm/run-tests.sh -- offline regression for phase-06-mgmt-vm.sh +# + unit tests of resolve_tenant_ip.py. Fake openstack; real python3. +set -euo pipefail +IFS=$'\n\t' +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPTS="$(cd "$HERE/../../scripts" && pwd)" +TARGET="$SCRIPTS/phase-06-mgmt-vm.sh" +RESOLVE="$SCRIPTS/resolve_tenant_ip.py" +BIN="$HERE/fakebin" +[ -f "$TARGET" ] && [ -f "$RESOLVE" ] || { echo "FAIL: target/helper missing" >&2; exit 1; } +chmod +x "$BIN"/* 2>/dev/null || true +WORK="$(mktemp -d)"; trap 'rm -rf "$WORK"' EXIT +rc_all=0 + +echo "=== unit: resolve_tenant_ip.py (FIP via env; three address shapes) ===" +u() { local want="$1" fip="$2" json="$3" label="$4" got + got=$(printf '%s' "$json" | FIP="$fip" python3 "$RESOLVE") + if [ "$got" = "$want" ]; then printf ' [OK] %-30s -> %s\n' "$label" "${got:-}" + else printf ' [XX] %-30s -> %s (want %s)\n' "$label" "${got:-}" "$want"; rc_all=1; fi +} +u 10.20.0.107 10.12.5.103 '{"addresses":{"n":["10.20.0.107","10.12.5.103"]}}' "list shape" +u 10.20.0.107 10.12.5.103 '{"addresses":{"n":[{"addr":"10.20.0.107"},{"addr":"10.12.5.103"}]}}' "dict {addr} shape" +u 10.20.0.107 10.12.5.103 '{"addresses":{"n":"10.20.0.107, 10.12.5.103"}}' "comma-string shape" +u "" 10.12.5.103 '{"addresses":{"n":["10.12.5.103"]}}' "only FIP -> empty" +u 10.20.0.107 "" '{"addresses":{"n":["10.20.0.107"]}}' "no FIP env -> first" + +echo "=== integration: phase-06-mgmt-vm.sh ===" +run() { + local want="$1" re="$2" label="$3"; shift 3 + rm -f "$WORK/vm.marker" "$WORK/net.env" + local rc + set +e + PATH="$BIN:$PATH" HOME="$WORK" OS_AUTH_URL=x ENVFILE="$WORK/net.env" \ + MK_VM="$WORK/vm.marker" POLL_TRIES=2 POLL_SLEEP=0 FIPVAL=10.12.5.103 TENANT=10.20.0.107 \ + env "$@" bash "$TARGET" >"$WORK/out" 2>&1 + rc=$?; set -e + if [ "$rc" -eq "$want" ] && grep -qE "$re" "$WORK/out"; then + printf ' [OK] %-40s exit %s\n' "$label" "$rc" + else + printf ' [XX] %-40s exit %s (want %s; /%s/)\n' "$label" "$rc" "$want" "$re" + sed 's/^/ /' "$WORK/out"; rc_all=1 + fi +} +run 0 'allocated . associated FIP 10.12.5.103' "fresh: create+poll+allocate FIP" +run 0 'already has floating IP 10.12.5.103 .reusing.' "idempotent: reuse existing FIP" VM_PRESENT=1 FIP_PRESENT=1 +run 1 'entered ERROR' "VM ERROR -> abort" VM_PRESENT=1 VM_STATUS=ERROR +run 1 'not ACTIVE after' "VM never ACTIVE -> abort" VM_PRESENT=1 VM_STATUS=BUILD +run 1 'could not resolve tenant IP' "tenant unresolvable -> abort" VM_PRESENT=1 FIP_PRESENT=1 ADDR_ONLY_FIP=1 +run 2 'OS_AUTH_URL unset' "precondition: no auth -> exit 2" OS_AUTH_URL= + +echo "=== assert: net.env persisted (both keys) + mode 600 ===" +rm -f "$WORK/vm.marker" "$WORK/net.env" +PATH="$BIN:$PATH" HOME="$WORK" OS_AUTH_URL=x ENVFILE="$WORK/net.env" MK_VM="$WORK/vm.marker" \ + POLL_TRIES=2 POLL_SLEEP=0 FIPVAL=10.12.5.103 TENANT=10.20.0.107 bash "$TARGET" >/dev/null 2>&1 || true +if grep -q '^MGMT_FIP=10.12.5.103$' "$WORK/net.env" && grep -q '^MGMT_TENANT_IP=10.20.0.107$' "$WORK/net.env"; then + perm=$(stat -c '%a' "$WORK/net.env" 2>/dev/null || echo '?') + [ "$perm" = 600 ] && echo " [OK] net.env has both keys, mode 600" || { echo " [XX] net.env mode=$perm (want 600)"; rc_all=1; } +else + echo " [XX] net.env missing keys"; sed 's/^/ /' "$WORK/net.env" 2>/dev/null; rc_all=1 +fi +echo +[ "$rc_all" -eq 0 ] && echo "ALL PASS" || echo "SOME FAILED" +exit "$rc_all"