diff --git a/docs/phase-00-maas-standup-notes.md b/docs/phase-00-maas-standup-notes.md new file mode 100644 index 0000000..102b80e --- /dev/null +++ b/docs/phase-00-maas-standup-notes.md @@ -0,0 +1,52 @@ +# phase-00 MAAS stand-up (D-058) -- notes + +`scripts/phase-00-maas-standup.sh` brings MAAS to the D-058 plane topology +idempotently. **Dry-run is the audit** (default): it resolves live ids BY +CIDR/name (PATTERN-1) and prints the plan, changing nothing. `--apply` executes. + +## Behavior per resource +- present and correct -> **SKIP** +- absent -> **CREATE** (fabric / VLAN / subnet / space / gateway / managed / dns / API-VIP reserve) +- present but bound to the wrong plane or wrong VID -> **DRIFT** (reported, never touched) + +A re-CIDR is destructive (MAAS cannot change a subnet CIDR in place), so it is +**out of scope by design**: the drift scan reports it as MIGRATE-NEEDED and the +script refuses to build onto a CIDR the wrong plane occupies. Verified idempotent +on `--apply` (zero mutations against an already-correct cloud). + +## D-058 target (what it stands up) +provider-public 10.12.4.0/22 (untagged, gw .4.1) | provider-vip 10.12.8.0/22 +(VID 104 on the provider fabric, gw .8.1, VIP band .8.2-.100) | metal-admin +10.12.12.0/22 (untagged, gw .12.1, VIP band .12.2-.100) | metal-internal +10.12.16.0/22 (VID 103 on the metal fabric, VIP band .16.2-.100) | data-tenant +10.12.20.0/22 | storage 10.12.32.0/22 | replication 10.12.36.0/22. Untagged base +planes are created first so their tagged siblings can ride the same fabric +(provider-public->provider-vip, metal-admin->metal-internal). + +## Scope boundary +- THIS script owns topology + the three API-VIP reserve bands (the bundle deploy + depends on those reserves existing). +- `phase-00-maas-carve.sh` keeps the FIP pool, mgmt reserves, and stale-range + cleanup. (Its D-058 CIDR update is the next pass.) +- It never deletes anything. + +## Relationship to provider-vip-standup.sh +This generalizes that script from one plane to the whole topology; provider-vip is +now just one row of the table. `provider-vip-standup.sh` remains the targeted +single-plane tool (add provider-vip to an already-D-058 cloud). Both source +`lib-net.sh`; no conflict. + +## Tests +`tests/phase-00-maas-standup/` -- fake `maas` + real jq, fixtures generated by +`make_fixtures.py`. Four scenarios, ALL PASS: fresh MAAS (full create plan), +D-058 done (all SKIP / zero WOULD), D-052 current (the three migrating planes +drift + refuse), wrong-VID (vid drift). Run: `bash tests/phase-00-maas-standup/run-tests.sh`. + +## The current-cloud gap (next deliverable) +The live cloud is D-052/053, so this stand-up will report metal-admin (.8), +metal-internal (.12), data-tenant (.16) as DRIFT -- those CIDRs are reassigned by +D-058. The destructive cutover (release/teardown so subnets have no links, delete +the old subnets in collision-safe order, then `--apply` to build the new scheme) +is a **separate gated step**, not this script. Sequence: teardown -> delete old +subnets -> `phase-00-maas-standup.sh --apply` -> `phase-00-maas-carve.sh` +(D-058) -> jumphost bridge re-IP (D-058 ordering trap) -> deploy. diff --git a/scripts/phase-00-maas-standup.sh b/scripts/phase-00-maas-standup.sh new file mode 100644 index 0000000..76b8b62 --- /dev/null +++ b/scripts/phase-00-maas-standup.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash +# scripts/phase-00-maas-standup.sh [--apply] +# +# MAAS topology stand-up for the v1 (VR0 / Baldurkeep) plane scheme (D-058). +# Idempotently brings MAAS to the target fabric/VLAN/subnet/space layout so the +# carve + bundle can resolve every plane. Useful BOTH for a fresh test cloud +# (everything absent -> full create plan) and for an existing cloud (present-and- +# correct -> SKIP; present-but-wrong -> reported as DRIFT, never silently changed). +# +# Default is DRY-RUN (the audit): resolves every id live BY CIDR/NAME (PATTERN-1, +# no hardcoded MAAS ids) and prints each mutation it WOULD run, changing nothing. +# Pass --apply to execute. Re-runnable; anything already correct is SKIPped. +# +# SAFETY: this script NEVER deletes. A re-CIDR (a subnet present at a CIDR that +# D-058 reassigns to a different plane -- e.g. the live D-052/053 cloud where +# 10.12.8/22 is metal-admin but D-058 wants it for provider-vip) is DESTRUCTIVE +# (MAAS cannot change a subnet's CIDR in place) and is therefore OUT OF SCOPE: +# it is reported in the DRIFT section as MIGRATE-NEEDED and gated to a separate +# human teardown step. This script will refuse to create a target subnet whose +# CIDR is occupied by the wrong plane. +# +# Scope boundary vs phase-00-maas-carve.sh: THIS script owns topology (fabric/ +# VLAN/subnet/space/gateway/managed/dns) + the per-plane API-VIP reserve bands +# (.2-.100 on the three VIP-bearing planes), which the bundle deploy depends on. +# The FIP pool, mgmt reserves, and stale-range cleanup stay in phase-00-maas-carve. +# +# Order matters (MAAS semantics + fresh-fabric bootstrap): untagged base planes +# first (each owns a fabric), then their tagged siblings ride that fabric, so a +# fresh MAAS bootstraps provider-public -> provider-vip, metal-admin -> metal-internal. +# +# Exit: 0 ok (no drift) | 1 fatal or unresolved drift | 2 precondition +# CLI forms verified against Canonical MAAS how-to-manage-networks. +# ASCII + LF. + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" + +MAAS_PROFILE="${MAAS_PROFILE:-admin}" +MODE="dryrun"; [ "${1:-}" = "--apply" ] && MODE="apply" +FATAL=0; DRIFT=0 + +hdr() { echo; echo "=== $* ==="; } +note() { echo " - $*"; } +fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } +need_jq || exit 1 +maas_q() { maas "$MAAS_PROFILE" "$@"; } + +emit() { # + local desc="$1"; shift + if [ "$MODE" = "apply" ]; then + echo " DO: $desc" + local out + if ! out="$(maas "$MAAS_PROFILE" "$@" 2>&1)"; then + fail "$desc" + echo " MAAS said: $(printf '%s' "$out" | grep -viE '^(Success|Machine-readable)' | head -3 | tr '\n' ' ')" >&2 + return 1 + fi + else + echo " WOULD: $desc" + echo " maas $MAAS_PROFILE $*" + fi +} + +# --- PATTERN-1 resolve-by-CIDR/name helpers (no hardcoded ids) --------------- +sub_id() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|.id' | head -1; } +sub_vid() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.vid|tostring)' | head -1; } +sub_fabid() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.fabric_id|tostring)' | head -1; } +sub_space() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.space // "")' | head -1; } +sub_field() { maas_q subnets read | jq -r --arg c "$1" --arg f "$2" '.[]|select(.cidr==$c)|(.[$f] // "")' | head -1; } +sub_mtu() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.mtu|tostring)' | head -1; } +sub_dns() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.dns_servers // []|join(","))' | head -1; } +space_id() { maas_q spaces read | jq -r --arg n "$1" '.[]|select(.name==$n)|(.id|tostring)' | head -1; } +fab_byname() { maas_q fabrics read | jq -r --arg n "$1" '.[]|select(.name==$n)|(.id|tostring)' | head -1; } +vlanobj() { maas_q vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.id|tostring)' | head -1; } +vlanspace() { maas_q vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.space // "")' | head -1; } +vlan0obj() { vlanobj "$1" 0; } # the untagged (vid 0) default VLAN of a fabric + +# --- target plane table (D-058): name|cidr|kind|vid|parent_cidr|gw|viplo|viphi|dnssrc +# kind=untagged owns a fabric; kind=tagged rides parent_cidr's fabric on . +# "-" = none. dnssrc = a CIDR whose dns_servers to mirror, or "-". +PLANES="$(cat < nothing to drift + if [ "$curspace" != "$name" ]; then + note "DRIFT: $cidr is space '$curspace' but D-058 assigns it to '$name' -- MIGRATE (delete+recreate; gated, NOT done here)" + WRONG_CIDR["$cidr"]=1; DRIFT=$((DRIFT+1)); continue + fi + if [ "$kind" = tagged ]; then + cv="$(sub_vid "$cidr")" + if [ "$cv" != "$vid" ]; then + note "DRIFT: $cidr space ok ('$name') but VID '$cv' != target $vid -- MIGRATE (gated)" + WRONG_CIDR["$cidr"]=1; DRIFT=$((DRIFT+1)) + fi + fi +done <<< "$PLANES" +[ "$DRIFT" -eq 0 ] && note "no drift: no target CIDR is occupied by the wrong plane" + +# ------------------------------------------------------------- per-plane standup +while IFS='|' read -r name cidr kind vid parent gw viplo viphi dnssrc; do + [ -n "$name" ] || continue + gw="$(dt "$gw")"; viplo="$(dt "$viplo")"; viphi="$(dt "$viphi")"; dnssrc="$(dt "$dnssrc")" + hdr "plane $name ($cidr, $kind${vid:+ }$( [ "$kind" = tagged ] && echo "VID $vid" ))" + + # refuse to build onto a CIDR the drift scan flagged as the wrong plane + if [ -n "${WRONG_CIDR[$cidr]:-}" ]; then + fail "$cidr occupied by the wrong plane (see DRIFT) -- resolve the migration first; skipping $name" + continue + fi + + # ---- resolve the fabric this plane lives on ---- + fab="" + if [ -n "$(sub_id "$cidr")" ]; then + fab="$(sub_fabid "$cidr")" + elif [ "$kind" = tagged ]; then + fab="$(sub_fabid "$parent")" + if [ -z "$fab" ]; then + [ "$MODE" = apply ] && { fail "$name: parent subnet $parent absent -- create it first"; continue; } + fab="" # dry-run: parent is planned above; show the plan + fi + else + fab="$(fab_byname "$name")" + if [ -z "$fab" ]; then + emit "create fabric $name" fabrics create name="$name" + fab="$(fab_byname "$name")" # re-resolve (apply: now exists) + [ -n "$fab" ] || fab="" # dry-run placeholder + fi + fi + note "fabric = $fab" + + # ---- space ---- + if [ -z "$(space_id "$name")" ]; then + emit "create space $name" spaces create name="$name" + else note "space $name exists -- SKIP"; fi + sid="$(space_id "$name")"; [ -n "$sid" ] || sid="" + + # ---- VLAN + the vlan-obj the subnet will ride ---- + if [ "$kind" = tagged ]; then + if [ -z "$(vlanobj "$fab" "$vid")" ]; then + mtu="$(sub_mtu "$parent")"; { [ -n "$mtu" ] && [ "$mtu" != null ]; } || mtu="1500" + emit "create VLAN vid=$vid name=$name mtu=$mtu on fabric $fab" \ + vlans create "$fab" name="$name" vid="$vid" mtu="$mtu" + else note "VID $vid on fabric $fab exists -- SKIP"; fi + if [ "$(vlanspace "$fab" "$vid")" != "$name" ]; then + emit "assign fabric $fab vid $vid -> space $name (id $sid)" vlan update "$fab" "$vid" space="$sid" + else note "VID $vid already in space $name -- SKIP"; fi + vobj="$(vlanobj "$fab" "$vid")"; [ -n "$vobj" ] || vobj="" + else + # untagged: rides the fabric default (vid 0); assign that vid-0 VLAN to the space + if [ "$(vlanspace "$fab" 0)" != "$name" ]; then + emit "assign fabric $fab untagged(vid 0) -> space $name (id $sid)" vlan update "$fab" 0 space="$sid" + else note "untagged VLAN on fabric $fab already in space $name -- SKIP"; fi + vobj="$(vlan0obj "$fab")"; [ -n "$vobj" ] || vobj="" + fi + + # ---- subnet (guard wrong-VID if present) ---- + if [ -z "$(sub_id "$cidr")" ]; then + emit "create subnet $cidr vlan=$vobj" subnets create cidr="$cidr" vlan="$vobj" + else + if [ "$kind" = tagged ]; then + got="$(sub_vid "$cidr")" + [ "$got" = "$vid" ] || { fail "subnet $cidr exists on VID '$got', expected $vid -- refusing"; continue; } + fi + note "subnet $cidr exists -- SKIP create" + fi + + # ---- gateway / managed / dns ---- + if [ -n "$gw" ]; then + if [ "$(sub_field "$cidr" gateway_ip)" != "$gw" ]; then + emit "subnet $cidr -> gateway_ip=$gw" subnet update "$cidr" gateway_ip="$gw" + else note "gateway_ip already $gw -- SKIP"; fi + fi + if [ "$(sub_field "$cidr" managed)" != "true" ]; then + emit "subnet $cidr -> managed=true" subnet update "$cidr" managed=true + else note "subnet $cidr already managed -- SKIP"; fi + if [ -n "$dnssrc" ]; then + dns="$(sub_dns "$dnssrc")" + if [ -n "$dns" ] && [ "$dns" != null ]; then + if [ "$(sub_dns "$cidr")" != "$dns" ]; then + emit "subnet $cidr -> dns_servers=$dns (mirrors $dnssrc)" subnet update "$cidr" dns_servers="$dns" + else note "dns_servers already $dns -- SKIP"; fi + else note "dns source $dnssrc has no dns_servers -- leaving $cidr dns unset"; fi + fi + + # ---- reserved API-VIP band ---- + if [ -n "$viplo" ]; then + if maas_q ipranges read | jq -e --arg lo "$viplo" '.[]|select(.start_ip==$lo)' >/dev/null 2>&1; then + note "reserved range starting $viplo exists -- SKIP" + else + rsid="$(sub_id "$cidr")"; [ -n "$rsid" ] || rsid="" + emit "create reserved API-VIP band $viplo-$viphi on subnet $rsid" \ + ipranges create type=reserved subnet="$rsid" start_ip="$viplo" end_ip="$viphi" \ + comment="$name API HA VIP band (D-058)" + fi + fi +done <<< "$PLANES" + +# ----------------------------------------------------------------------- result +hdr "result" +[ "$DRIFT" -eq 0 ] || echo " $DRIFT plane(s) need a gated re-CIDR/migration before they can be stood up (see DRIFT)." +if [ "$FATAL" -ne 0 ]; then echo " completed with $FATAL failure(s)"; exit 1; fi +if [ "$DRIFT" -ne 0 ]; then echo " OK ($MODE) -- but exit 1 due to unresolved drift"; exit 1; fi +echo " OK ($MODE) -- topology consistent with D-058" diff --git a/tests/phase-00-maas-standup/fakebin/maas b/tests/phase-00-maas-standup/fakebin/maas new file mode 100644 index 0000000..1180fc2 --- /dev/null +++ b/tests/phase-00-maas-standup/fakebin/maas @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# fake maas: serves read endpoints from fixtures. `vlans read ` is filtered +# by fabric_id to mimic real MAAS (which scopes vlans to the requested fabric). +prof="${1:-}"; obj="${2:-}"; act="${3:-}"; fab="${4:-}" +case "$obj $act" in + "subnets read") cat "${FIX_SUBNETS:?}"; exit 0 ;; + "spaces read") cat "${FIX_SPACES:?}"; exit 0 ;; + "ipranges read") cat "${FIX_IPRANGES:?}"; exit 0 ;; + "fabrics read") cat "${FIX_FABRICS:?}"; exit 0 ;; + "vlans read") jq --arg f "$fab" '[.[]|select((.fabric_id|tostring)==$f)]' "${FIX_VLANS:?}"; exit 0 ;; +esac +echo "{}"; exit 0 diff --git a/tests/phase-00-maas-standup/make_fixtures.py b/tests/phase-00-maas-standup/make_fixtures.py new file mode 100644 index 0000000..8d023cc --- /dev/null +++ b/tests/phase-00-maas-standup/make_fixtures.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# tests/phase-00-maas-standup/make_fixtures.py +# Emits fix/_{subnets,spaces,vlans,ipranges,fabrics}.json for the +# phase-00-maas-standup.sh behavior harness. ASCII + LF. +import json, os + +HERE = os.path.dirname(os.path.abspath(__file__)) +FIX = os.path.join(HERE, "fix") +os.makedirs(FIX, exist_ok=True) + + +def sub(cidr, sid, space, vid, fab, mtu=1500, gw=None, managed=True, dns=None): + return {"cidr": cidr, "id": sid, "space": space, + "vlan": {"vid": vid, "fabric_id": fab, "mtu": mtu}, + "gateway_ip": gw, "managed": managed, "dns_servers": dns or []} + + +def vlan(vid, vid_id, fab, space, mtu=1500): + return {"vid": vid, "id": vid_id, "fabric_id": fab, "space": space, "mtu": mtu} + + +def dump(scn, subnets, spaces, vlans, ipranges, fabrics): + for name, obj in (("subnets", subnets), ("spaces", spaces), ("vlans", vlans), + ("ipranges", ipranges), ("fabrics", fabrics)): + with open(os.path.join(FIX, f"{scn}_{name}.json"), "w") as f: + json.dump(obj, f, indent=2) + f.write("\n") + + +def spaces_list(names): + return [{"name": n, "id": i + 1} for i, n in enumerate(names)] + + +def fabrics_list(pairs): # [(name,id)] + return [{"name": n, "id": i} for n, i in pairs] + + +# ---- FRESH: nothing exists -> full create plan ---- +dump("fresh", [], [], [], [], []) + +# ---- DONE: D-058 fully present + correct -> all SKIP, zero WOULD ---- +fabs = fabrics_list([("provider", 1), ("metal", 2), ("data", 3), ("storage", 4), ("replication", 5)]) +vl = [ + vlan(0, 10, 1, "provider-public"), vlan(104, 11, 1, "provider-vip"), + vlan(0, 20, 2, "metal-admin"), vlan(103, 21, 2, "metal-internal", mtu=9000), + vlan(0, 30, 3, "data-tenant"), vlan(0, 40, 4, "storage"), vlan(0, 50, 5, "replication"), +] +subs = [ + sub("10.12.4.0/22", 1, "provider-public", 0, 1, gw="10.12.4.1"), + sub("10.12.8.0/22", 2, "provider-vip", 104, 1, gw="10.12.8.1"), # dns mirrors metal-internal (which is unset) -> left unset + sub("10.12.12.0/22", 3, "metal-admin", 0, 2, gw="10.12.12.1"), + sub("10.12.16.0/22", 4, "metal-internal", 103, 2, mtu=9000), + sub("10.12.20.0/22", 5, "data-tenant", 0, 3), + sub("10.12.32.0/22", 6, "storage", 0, 4), + sub("10.12.36.0/22", 7, "replication", 0, 5), +] +spc = spaces_list(["provider-public", "provider-vip", "metal-admin", "metal-internal", + "data-tenant", "storage", "replication"]) +ipr = [{"type": "reserved", "start_ip": lo, "end_ip": hi, "subnet": {"id": sid}} + for lo, hi, sid in [("10.12.8.2", "10.12.8.100", 2), + ("10.12.12.2", "10.12.12.100", 3), + ("10.12.16.2", "10.12.16.100", 4)]] +dump("done", subs, spc, vl, ipr, fabs) + +# ---- D-052 CURRENT (old live scheme): the three migrating planes drift ---- +fabs_c = fabrics_list([("1_provider", 1), ("2_metal", 2), ("4_data", 3), + ("8_storage", 4), ("9_replication", 5)]) +vl_c = [ + vlan(0, 10, 1, "provider-public"), + vlan(0, 20, 2, "metal-admin"), vlan(103, 21, 2, "metal-internal", mtu=9000), + vlan(0, 30, 3, "data-tenant"), vlan(0, 40, 4, "storage"), vlan(0, 50, 5, "replication"), +] +subs_c = [ + sub("10.12.4.0/22", 1, "provider-public", 0, 1, gw="10.12.4.1"), # correct under D-058 -> SKIP + sub("10.12.8.0/22", 2, "metal-admin", 0, 2, gw="10.12.8.1"), # D-058 wants provider-vip -> DRIFT + sub("10.12.12.0/22", 3, "metal-internal", 103, 2, mtu=9000), # D-058 wants metal-admin -> DRIFT + sub("10.12.16.0/22", 4, "data-tenant", 0, 3), # D-058 wants metal-internal -> DRIFT + sub("10.12.32.0/22", 6, "storage", 0, 4), # correct -> SKIP + sub("10.12.36.0/22", 7, "replication", 0, 5), # correct -> SKIP +] +spc_c = spaces_list(["provider-public", "metal-admin", "metal-internal", + "data-tenant", "storage", "replication"]) +dump("d052", subs_c, spc_c, vl_c, [], fabs_c) + +# ---- WRONG-VID: provider-vip subnet present at .8 but on VID 99 ---- +vl_w = [v for v in vl if v["vid"] != 104] + [vlan(99, 11, 1, "provider-vip")] +subs_w = [s for s in subs if s["cidr"] != "10.12.8.0/22"] + \ + [sub("10.12.8.0/22", 2, "provider-vip", 99, 1, gw="10.12.8.1")] +dump("wrongvid", subs_w, spc, vl_w, ipr, fabs) + +print("fixtures written to", FIX) diff --git a/tests/phase-00-maas-standup/run-tests.sh b/tests/phase-00-maas-standup/run-tests.sh new file mode 100644 index 0000000..438ab6f --- /dev/null +++ b/tests/phase-00-maas-standup/run-tests.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Behavior regression for phase-00-maas-standup.sh (D-058). Fake `maas` + real jq. +# Drives DRY-RUN and asserts WOULD/SKIP/DRIFT/refuse behaviour across scenarios. +set -uo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT="$(cd "$HERE/../../scripts" && pwd)/phase-00-maas-standup.sh" +BIN="$HERE/fakebin"; FIX="$HERE/fix" +chmod +x "$BIN"/* 2>/dev/null || true # GitHub Desktop lands files mode 100644 +command -v jq >/dev/null || { echo "FAIL: jq required"; exit 1; } +python3 "$HERE/make_fixtures.py" >/dev/null +rc_all=0; OUT="$(mktemp)" + +run() { #