diff --git a/scripts/phase-00-teardown.sh b/scripts/phase-00-teardown.sh new file mode 100644 index 0000000..eb8120e --- /dev/null +++ b/scripts/phase-00-teardown.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +# scripts/phase-00-teardown.sh [--apply] [--no-prompt] +# +# Gated teardown for the D-058 reconfigure. Destroys the `openstack` Juju model and +# deletes the orphaned `capi-mgmt` MAAS machine, so the hosts release to Ready for the +# re-CIDR/standup/rebuild. HARD-EXCLUDES the management substrate (juju, lxd, tailscale): +# those system_ids are resolved and asserted OUT of every target set -- the script +# refuses to run if a target ever resolves to a protected machine. +# +# Machine roster (measured from `maas admin machines read`, 2026-06-29): +# PROTECTED (never touched): juju, lxd, tailscale -- management substrate +# HOSTS (released to Ready): openstack0, openstack1, openstack2, openstack3 +# ORPHAN (deleted): capi-mgmt -- retired CAPI mgmt machine +# The in-cloud capi-mgmt-v2 Nova VM is NOT a MAAS machine; it dies with the model destroy. +# +# DEFAULT = DRY-RUN AUDIT: resolves every system_id, prints the plan (what is destroyed, +# what is protected), changes nothing. `--apply` executes; the irreversible destroy-model +# requires typing the model name at a /dev/tty prompt first (skip with --no-prompt for +# tested automation). destroy-model releases openstack0-3 to Ready; this script does NOT +# run a blind machine-release loop (avoids racing Juju's own release). +# +# Storage: destroy-model uses --destroy-storage (full teardown; data is discarded -- intended). +# If destroy hangs on stuck units, re-run with FORCE=1 (adds --force --no-wait). +# +# Exit: 0 ok | 1 fatal / unsafe (target intersects substrate, host missing) | 2 aborted by operator +# ASCII + LF. + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true + +MAAS_PROFILE="${MAAS_PROFILE:-admin}" +MODEL="${OPENSTACK_MODEL:-openstack}" +HOSTS=(openstack0 openstack1 openstack2 openstack3) +ORPHANS=(capi-mgmt) +PROTECTED=(juju lxd tailscale) + +MODE="dryrun"; PROMPT=1 +for a in "$@"; do + case "$a" in + --apply) MODE="apply" ;; + --no-prompt) PROMPT=0 ;; + *) echo "unknown arg: $a" >&2; exit 1 ;; + esac +done +FATAL=0 + +hdr() { echo; echo "=== $* ==="; } +note() { echo " - $*"; } +fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } +command -v jq >/dev/null || { echo "FATAL: jq required" >&2; exit 1; } +command -v juju >/dev/null || { echo "FATAL: juju not on PATH" >&2; exit 1; } + +maas_json() { local o; o="$(maas "$MAAS_PROFILE" "$@" 2>/dev/null || true)"; printf '%s' "$o" | jq empty 2>/dev/null && printf '%s' "$o" || printf '[]'; } + +# one read of the machine roster; resolve everything from it +MACHINES_JSON="$(maas_json machines read)" +sid_of() { printf '%s' "$MACHINES_JSON" | jq -r --arg h "$1" '.[]|select(.hostname==$h)|.system_id' | head -1; } +status_of() { printf '%s' "$MACHINES_JSON" | jq -r --arg h "$1" '.[]|select(.hostname==$h)|.status_name' | head -1; } + +emit() { # + local desc="$1"; shift + if [ "$MODE" = apply ]; then + echo " DO: $desc" + local out + if ! out="$("$@" 2>&1)"; then + fail "$desc"; echo " said: $(printf '%s' "$out" | head -3 | tr '\n' ' ')" >&2; return 1 + fi + else + echo " WOULD: $desc"; echo " $*" + fi +} + +hdr "teardown audit mode=$MODE model=$MODEL" + +# --- resolve protected substrate (must exist; will NOT be touched) --- +declare -A PROT_SID +hdr "PROTECTED substrate (never touched)" +for p in "${PROTECTED[@]}"; do + s="$(sid_of "$p")" + if [ -z "$s" ]; then note "$p: not in MAAS (already absent) -- nothing to protect"; continue; fi + PROT_SID["$s"]="$p"; note "$p = $s (status $(status_of "$p")) -- PROTECTED" +done + +# --- resolve hosts (must exist; released to Ready via destroy-model) --- +HSID=() +hdr "HOSTS (released to Ready by destroy-model)" +for h in "${HOSTS[@]}"; do + s="$(sid_of "$h")" + if [ -z "$s" ]; then fail "$h: not found in MAAS -- cannot proceed (roster mismatch)"; continue; fi + if [ -n "${PROT_SID[$s]:-}" ]; then fail "$h resolves to PROTECTED sid $s (${PROT_SID[$s]}) -- ABORT"; continue; fi + HSID+=("$s"); note "$h = $s (status $(status_of "$h"))" +done + +# --- resolve orphans (deleted; absent is fine) --- +declare -A OSID +hdr "ORPHANS (deleted from MAAS)" +for o in "${ORPHANS[@]}"; do + s="$(sid_of "$o")" + if [ -z "$s" ]; then note "$o: absent -- SKIP (nothing to delete)"; continue; fi + if [ -n "${PROT_SID[$s]:-}" ]; then fail "$o resolves to PROTECTED sid $s (${PROT_SID[$s]}) -- ABORT"; continue; fi + OSID["$s"]="$o"; note "$o = $s (status $(status_of "$o")) -- DELETE" +done + +# --- model presence --- +MODEL_PRESENT=0 +if juju models --format=json 2>/dev/null | jq -e --arg m "$MODEL" '.models[]?|select(.name==$m or (.name|endswith("/"+$m)))' >/dev/null 2>&1; then + MODEL_PRESENT=1; note "juju model '$MODEL' is PRESENT -- will destroy" +else + note "juju model '$MODEL' not present -- destroy will be skipped" +fi + +[ "$FATAL" -eq 0 ] || { echo; echo "ABORT: $FATAL safety failure(s) above -- nothing was changed"; exit 1; } + +hdr "PLAN" +echo " 1) destroy juju model '$MODEL' (--destroy-storage${FORCE:+ --force --no-wait}; releases openstack0-3 to Ready)" +echo " [$([ "$MODEL_PRESENT" = 1 ] && echo 'present -> will run' || echo 'absent -> skip')]" +echo " 2) delete orphan MAAS machine(s): ${ORPHANS[*]}" +echo " 3) verify openstack0-3 reach Ready; confirm substrate untouched" +echo " PROTECTED (asserted out of all targets): ${PROTECTED[*]}" + +if [ "$MODE" = dryrun ]; then + echo; echo " re-run with --apply to execute (you will be asked to type the model name)." + echo "OK (dryrun)"; exit 0 +fi + +# ---- apply: irreversible-destroy confirmation ---- +if [ "$PROMPT" -eq 1 ] && [ "$MODEL_PRESENT" = 1 ]; then + printf 'Type the model name "%s" to confirm IRREVERSIBLE destroy: ' "$MODEL" > /dev/tty + read -r ans < /dev/tty + [ "$ans" = "$MODEL" ] || { echo "aborted (got '$ans', expected '$MODEL') -- nothing changed"; exit 2; } +fi + +hdr "MUTATE 1/3: destroy model" +if [ "$MODEL_PRESENT" = 1 ]; then + if [ -n "${FORCE:-}" ]; then + emit "destroy model $MODEL (FORCE)" juju destroy-model "$MODEL" --destroy-storage --force --no-wait --no-prompt + else + emit "destroy model $MODEL" juju destroy-model "$MODEL" --destroy-storage --no-prompt + fi +else note "model absent -- skip"; fi +[ "$FATAL" -eq 0 ] || { echo; echo "STOP: destroy-model failed -- not deleting orphans. Investigate (FORCE=1 if units are stuck)."; exit 1; } + +hdr "MUTATE 2/3: delete orphan machines" +for s in "${!OSID[@]}"; do emit "delete orphan ${OSID[$s]} ($s)" maas "$MAAS_PROFILE" machine delete "$s" || true; done +[ "$FATAL" -eq 0 ] || { echo; echo "completed with $FATAL failure(s)"; exit 1; } + +hdr "VERIFY 3/3 (read-only): host status + substrate intact" +MACHINES_JSON="$(maas_json machines read)" +for h in "${HOSTS[@]}"; do + st="$(status_of "$h")" + if [ "$st" = "Ready" ]; then note "$h -> $st" + else note "$h -> $st (Juju release may still be in progress; re-check, or release manually if it stalls)"; fi +done +for p in "${PROTECTED[@]}"; do note "PROTECTED $p -> $(status_of "$p") (unchanged)"; done + +echo; echo "next: scripts/phase-00-maas-recidr.sh (audit) -- once openstack0-3 are Ready" +echo "OK (apply)" diff --git a/tests/phase-00-teardown/fakebin/juju b/tests/phase-00-teardown/fakebin/juju new file mode 100644 index 0000000..78d33ee --- /dev/null +++ b/tests/phase-00-teardown/fakebin/juju @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# fake juju: models --format=json from fixture; destroy-model succeeds. +sub="${1:-}" +case "$sub" in + models) cat "${FIX_MODELS:?}"; exit 0 ;; + destroy-model) echo "destroying model: $*"; exit 0 ;; +esac +echo "{}"; exit 0 diff --git a/tests/phase-00-teardown/fakebin/maas b/tests/phase-00-teardown/fakebin/maas new file mode 100644 index 0000000..cfbad21 --- /dev/null +++ b/tests/phase-00-teardown/fakebin/maas @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# fake maas: serves machines read from fixture; machine delete succeeds. +prof="${1:-}"; obj="${2:-}"; act="${3:-}"; a4="${4:-}" +case "$obj $act" in + "machines read") cat "${FIX_MACHINES:?}"; exit 0 ;; + "machine delete") echo "Success."; exit 0 ;; + "machine read") cat "${FIX_MACHINES:?}" | jq --arg s "$a4" '.[]|select(.system_id==$s)'; exit 0 ;; +esac +echo "{}"; exit 0 diff --git a/tests/phase-00-teardown/make_fixtures.py b/tests/phase-00-teardown/make_fixtures.py new file mode 100644 index 0000000..93832e5 --- /dev/null +++ b/tests/phase-00-teardown/make_fixtures.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# Fixtures for phase-00-teardown.sh. Machine roster + sids measured 2026-06-29. +import json, os +FIX=os.path.join(os.path.dirname(os.path.abspath(__file__)),"fix"); os.makedirs(FIX,exist_ok=True) +def m(host,sid,st): return {"hostname":host,"system_id":sid,"status_name":st} +ROSTER=[ + m("juju","88fkyf","Deployed"), m("lxd","mfewpp","Deployed"), m("tailscale","rkd3da","Deployed"), + m("capi-mgmt","cffxxg","Ready"), + m("openstack0","6xey8x","Deployed"), m("openstack1","4667xm","Deployed"), + m("openstack2","f6t87k","Deployed"), m("openstack3","htbft7","Deployed"), +] +def dump(scn, machines, models): + open(os.path.join(FIX,f"{scn}_machines.json"),"w").write(json.dumps(machines,indent=2)+"\n") + open(os.path.join(FIX,f"{scn}_models.json"),"w").write(json.dumps({"models":models},indent=2)+"\n") +MODELS_WITH=[{"name":"admin/openstack"},{"name":"admin/controller"}] +MODELS_WITHOUT=[{"name":"admin/controller"}] +dump("live", ROSTER, MODELS_WITH) # normal: model present, all machines +dump("hostmissing", [x for x in ROSTER if x["hostname"]!="openstack2"], MODELS_WITH) # openstack2 gone +dump("modelgone", ROSTER, MODELS_WITHOUT) # model already destroyed +print("fixtures written") diff --git a/tests/phase-00-teardown/run-tests.sh b/tests/phase-00-teardown/run-tests.sh new file mode 100644 index 0000000..eabf488 --- /dev/null +++ b/tests/phase-00-teardown/run-tests.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# Behavior regression for phase-00-teardown.sh. Fake juju + fake maas + real jq. +set -uo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT="$(cd "$HERE/../../scripts" && pwd)/phase-00-teardown.sh" +BIN="$HERE/fakebin"; FIX="$HERE/fix" +chmod +x "$BIN"/* 2>/dev/null || true +command -v jq >/dev/null || { echo "FAIL: jq required"; exit 1; } +python3 "$HERE/make_fixtures.py" >/dev/null +rc_all=0; OUT="$(mktemp)" +run() { local want="$1" label="$2" s="$3"; shift 3 + PATH="$BIN:$PATH" FIX_MACHINES="$FIX/${s}_machines.json" FIX_MODELS="$FIX/${s}_models.json" \ + bash "$SCRIPT" "$@" >"$OUT" 2>&1; local rc=$? + if [ "$rc" -ne "$want" ]; then printf ' [XX] %-44s exit %s (want %s)\n' "$label" "$rc" "$want"; sed 's/^/ /' "$OUT"; rc_all=1; return 1; fi + printf ' [ok] %-44s exit %s\n' "$label" "$rc"; return 0; } +has() { grep -qE "$1" "$OUT" || { printf ' MISS /%s/\n' "$1"; rc_all=1; }; } +absent(){ grep -qE "$1" "$OUT" && { printf ' LEAK /%s/\n' "$1"; rc_all=1; } || true; } +# assert NONE of the substrate system_ids appear on a "DO:" line +no_substrate_do(){ if grep -E '^\s*DO:' "$OUT" | grep -qE '88fkyf|mfewpp|rkd3da'; then printf ' SUBSTRATE in a DO line!\n'; rc_all=1; fi; } + +echo "=== phase-00-teardown.sh -- model destroy + orphan delete, substrate-protected ===" +run 0 "audit (dry-run): plan + protections" live +has 'PROTECTED substrate' +has 'juju = 88fkyf .*PROTECTED'; has 'lxd = mfewpp .*PROTECTED'; has 'tailscale = rkd3da .*PROTECTED' +has 'openstack0 = 6xey8x'; has 'openstack3 = htbft7' +has 'capi-mgmt = cffxxg .*DELETE' +has "model 'openstack' is PRESENT" +has 'OK \(dryrun\)' +absent '^\s*DO:' + +run 0 "apply --no-prompt: destroy + delete orphan" live --apply --no-prompt +has 'DO: destroy model openstack' +has 'DO: delete orphan capi-mgmt' +has 'VERIFY 3/3' +has 'PROTECTED tailscale -> Deployed \(unchanged\)' +has 'OK \(apply\)' +no_substrate_do + +run 1 "safety: a host missing -> ABORT" hostmissing --apply --no-prompt +has 'openstack2: not found in MAAS' +has 'ABORT' +absent 'DO: destroy model' + +run 0 "idempotent: model already gone -> skip destroy" modelgone --apply --no-prompt +has "model 'openstack' not present" +absent 'DO: destroy model openstack' +has 'DO: delete orphan capi-mgmt' +has 'OK \(apply\)' + +echo +[ "$rc_all" -eq 0 ] && echo "ALL PASS" || echo "SOME FAILED" +rm -f "$OUT"; exit "$rc_all"