#!/usr/bin/env bash
# scripts/phase-00-teardown-release.sh [--apply] [--canary] [--no-prompt]
#
# MACHINE-PRESERVING teardown (D-061). Removes the `openstack` model WITHOUT
# decomposing the MAAS pod-composed openstack0-3 machines, so they stay enlisted +
# carved and the next deploy needs NO reenroll/recarve.
#
# WHY THIS EXISTS (D-061, supersedes D-055): on this virsh-pod MAAS, `juju
# destroy-model` decomposes the pod-composed machines REGARDLESS of --destroy-storage
# (observed 3x in one session, incl. a run with neither --destroy-storage nor a release
# flag). D-055's "omit --destroy-storage" diagnosis did NOT hold. The documented
# machine-retention primitive is on `remove-machine`, NOT `destroy-model`:
# Juju 3.6 remove-machine ref: "It is possible to remove a machine from Juju model
# without affecting the corresponding cloud instance by using the --keep-instance
# option." destroy-model has no such option.
# So: `juju remove-machine <id> --keep-instance --force --no-wait` per host FIRST
# (detaches from the model, leaves the MAAS instance intact), THEN destroy-model with
# only the controller relationship + LXD containers left to clean up.
#
# !!! UNVALIDATED ON THIS VIRSH-POD MAAS at first authorship. --keep-instance is
# DOCUMENTED but had not been exercised here when this script was written. Run
# --canary FIRST (removes ONLY openstack0, hard-verifies it survived in MAAS, then
# STOPS) before trusting the all-four path. The script HARD-VERIFIES survival after
# every remove and FAILS LOUD (exit 1) if any host decomposed.
#
# Roster (resolved live from `maas admin machines read`):
# PROTECTED (never touched): juju, lxd, tailscale -- management substrate
# HOSTS (kept in MAAS): openstack0-3
# ORPHAN (deleted if present): capi-mgmt
#
# DEFAULT = DRY-RUN AUDIT (resolves sids, prints plan, changes nothing).
# --apply execute. Typed-approval gate (model name) before any mutation.
# --canary with --apply: do ONLY openstack0 (remove --keep-instance + verify), STOP.
# --no-prompt skip the typed gate (tested automation only).
#
# Exit: 0 ok | 1 fatal/unsafe (target intersects substrate; host decomposed; destroy
# failed) | 2 aborted by operator. ASCII + LF.
set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
MAAS_PROFILE="${MAAS_PROFILE:-admin}"
MODEL="${OPENSTACK_MODEL:-openstack}"
HOSTS=(openstack0 openstack1 openstack2 openstack3)
ORPHANS=(capi-mgmt)
PROTECTED=(juju lxd tailscale)
MODE="dryrun"; PROMPT=1; CANARY=0
for a in "$@"; do
case "$a" in
--apply) MODE="apply" ;;
--canary) CANARY=1 ;;
--no-prompt) PROMPT=0 ;;
*) echo "unknown arg: $a" >&2; exit 1 ;;
esac
done
FATAL=0
hdr() { echo; echo "=== $* ==="; }
note() { echo " - $*"; }
fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); }
command -v jq >/dev/null || { echo "FATAL: jq required" >&2; exit 1; }
command -v juju >/dev/null || { echo "FATAL: juju not on PATH" >&2; exit 1; }
maas_json() { local o; o="$(maas "$MAAS_PROFILE" "$@" 2>/dev/null || true)"; printf '%s' "$o" | jq empty 2>/dev/null && printf '%s' "$o" || printf '[]'; }
MACHINES_JSON="$(maas_json machines read)"
sid_of() { printf '%s' "$MACHINES_JSON" | jq -r --arg h "$1" '.[]|select(.hostname==$h)|.system_id' | head -1; }
status_of() { printf '%s' "$MACHINES_JSON" | jq -r --arg h "$1" '.[]|select(.hostname==$h)|.status_name' | head -1; }
# resolve the juju machine-id for a given MAAS hostname (juju inst-id == MAAS hostname here)
juju_mid_of() {
juju machines -m "$MODEL" --format=json 2>/dev/null \
| jq -r --arg h "$1" '.machines | to_entries[] | select(.value."instance-id"==$h) | .key' | head -1
}
hdr "release-teardown audit mode=$MODE canary=$CANARY model=$MODEL"
# --- protected substrate (must never be a target) ---
declare -A PROT_SID
hdr "PROTECTED substrate (never touched)"
for p in "${PROTECTED[@]}"; do
s="$(sid_of "$p")"
if [ -z "$s" ]; then note "$p: not in MAAS -- nothing to protect"; continue; fi
PROT_SID["$s"]="$p"; note "$p = $s (status $(status_of "$p")) -- PROTECTED"
done
# --- hosts (kept) ---
hdr "HOSTS (kept in MAAS via --keep-instance)"
declare -A HMID
HOSTS_EFFECTIVE=("${HOSTS[@]}")
[ "$CANARY" -eq 1 ] && HOSTS_EFFECTIVE=(openstack0)
for h in "${HOSTS_EFFECTIVE[@]}"; do
s="$(sid_of "$h")"
if [ -z "$s" ]; then fail "$h: not in MAAS -- roster mismatch"; continue; fi
if [ -n "${PROT_SID[$s]:-}" ]; then fail "$h resolves to PROTECTED sid $s -- ABORT"; continue; fi
mid="$(juju_mid_of "$h")"
if [ -z "$mid" ]; then note "$h = $s (status $(status_of "$h")) -- NOT in juju model (already detached?); will skip remove"; HMID["$h"]=""; continue; fi
HMID["$h"]="$mid"; note "$h = $s juju-machine $mid (status $(status_of "$h"))"
done
# --- orphans (deleted; absent ok) ---
declare -A OSID
hdr "ORPHANS (deleted from MAAS)"
for o in "${ORPHANS[@]}"; do
s="$(sid_of "$o")"
if [ -z "$s" ]; then note "$o: absent -- SKIP"; continue; fi
if [ -n "${PROT_SID[$s]:-}" ]; then fail "$o resolves to PROTECTED sid $s -- ABORT"; continue; fi
OSID["$s"]="$o"; note "$o = $s -- DELETE"
done
# --- model presence ---
MODEL_PRESENT=0
if juju models --format=json 2>/dev/null | jq -e --arg m "$MODEL" '.models[]?|select(.name==$m or (.name|endswith("/"+$m)))' >/dev/null 2>&1; then
MODEL_PRESENT=1; note "juju model '$MODEL' PRESENT"
else
note "juju model '$MODEL' not present -- remove/destroy skipped"
fi
[ "$FATAL" -eq 0 ] || { echo; echo "ABORT: $FATAL safety failure(s) -- nothing changed"; exit 1; }
hdr "PLAN"
echo " 1) per host: juju remove-machine <id> --keep-instance --force --no-wait"
echo " (detaches from model; MAAS instance + carve PRESERVED)"
echo " 2) HARD-VERIFY each host still present in MAAS (FAIL LOUD if decomposed)"
[ "$CANARY" -eq 1 ] && echo " -- CANARY: openstack0 ONLY, then STOP (no destroy-model, no orphan delete) --"
if [ "$CANARY" -eq 0 ]; then
echo " 3) juju destroy-model $MODEL --release-storage --no-prompt (machines already detached)"
echo " 4) delete orphan MAAS machine(s): ${ORPHANS[*]}"
fi
echo " PROTECTED: ${PROTECTED[*]}"
if [ "$MODE" = dryrun ]; then
echo; echo " re-run with --apply (and --canary for the single-host first run)."
echo "OK (dryrun)"; exit 0
fi
# ---- typed-approval gate ----
if [ "$PROMPT" -eq 1 ] && [ "$MODEL_PRESENT" = 1 ]; then
printf 'Type the model name "%s" to confirm machine-preserving teardown: ' "$MODEL" > /dev/tty
read -r ans < /dev/tty
[ "$ans" = "$MODEL" ] || { echo "aborted (got '$ans') -- nothing changed"; exit 2; }
fi
# ---- MUTATE 1: remove-machine --keep-instance per host, verify survival ----
hdr "MUTATE 1: remove-machine --keep-instance (per host) + survival verify"
if [ "$MODEL_PRESENT" = 1 ]; then
for h in "${HOSTS_EFFECTIVE[@]}"; do
mid="${HMID[$h]:-}"
if [ -z "$mid" ]; then note "$h: no juju machine-id -- skip remove"; continue; fi
echo " DO: juju remove-machine $mid --keep-instance --force --no-wait ($h)"
if ! juju remove-machine "$mid" -m "$MODEL" --keep-instance --force --no-wait 2>&1; then
fail "remove-machine $mid ($h) returned error"; continue
fi
done
echo " ...waiting 30s for MAAS to settle, then verifying survival"
sleep 30
MACHINES_JSON="$(maas_json machines read)"
for h in "${HOSTS_EFFECTIVE[@]}"; do
s="$(sid_of "$h")"
if [ -z "$s" ]; then
fail "$h DECOMPOSED -- gone from MAAS after remove-machine --keep-instance (the documented behavior did NOT hold on this MAAS; STOP and investigate before continuing)"
else
note "$h survived in MAAS = $s (status $(status_of "$h")) -- GOOD"
fi
done
else note "model absent -- skip remove"; fi
[ "$FATAL" -eq 0 ] || { echo; echo "STOP: a host decomposed or remove failed -- NOT destroying model / deleting orphans. Investigate."; exit 1; }
if [ "$CANARY" -eq 1 ]; then
echo; echo "CANARY OK: openstack0 detached from model and SURVIVED in MAAS."
echo " --keep-instance is validated on this MAAS. Re-run WITHOUT --canary for all four + destroy-model."
echo "OK (canary)"; exit 0
fi
# ---- MUTATE 2: destroy model (machines already detached -> nothing to decompose) ----
hdr "MUTATE 2: destroy model (machines detached; --release-storage)"
if [ "$MODEL_PRESENT" = 1 ]; then
echo " DO: juju destroy-model $MODEL --release-storage --force --no-wait --no-prompt"
if ! juju destroy-model "$MODEL" --release-storage --force --no-wait --no-prompt 2>&1; then
fail "destroy-model returned error"
fi
else note "model absent -- skip"; fi
# ---- MUTATE 3: delete orphans ----
hdr "MUTATE 3: delete orphan machines"
for s in "${!OSID[@]}"; do
echo " DO: delete orphan ${OSID[$s]} ($s)"
maas "$MAAS_PROFILE" machine delete "$s" >/dev/null 2>&1 || note "orphan ${OSID[$s]} delete failed (may already be gone)"
done
# ---- VERIFY ----
hdr "VERIFY (read-only): hosts present + substrate intact"
MACHINES_JSON="$(maas_json machines read)"
for h in "${HOSTS[@]}"; do
st="$(status_of "$h")"
if [ -n "$st" ]; then note "$h -> $st (kept)"; else note "$h -> ABSENT (unexpected for the keep path)"; fi
done
for p in "${PROTECTED[@]}"; do note "PROTECTED $p -> $(status_of "$p") (unchanged)"; done
echo; echo "next: hosts are KEPT (Deployed) -- a redeploy reuses them in place (no reenroll/recarve)."
echo " if you intend a fresh deploy onto these same carved hosts: juju add-model $MODEL -> deploy."
echo "OK (apply)"