diff --git a/docs/v1-redeploy-changelog.md b/docs/v1-redeploy-changelog.md index aba5fcc..d75e50e 100644 --- a/docs/v1-redeploy-changelog.md +++ b/docs/v1-redeploy-changelog.md @@ -630,5 +630,33 @@ root-fix (DNS+FQDN certs)" subsection so a future operator does not apply it on the testcloud; fix the verify command https->http. Cross-ref D-044. Also fix the stale "Main LXD UI" vhost comment. +### Phase-04 prep -- network-carve verify deliverable + DOCFIX-047 (2026-06-27, pre-execution) +New read-only deliverable staged ahead of running phase-04 (network carve): + scripts/phase-04-network-verify.sh -- verify-before-mutate + EXIT-GATE check for the + Neutron external provider network. PRE gate: discovers the MAAS provider subnet BY CIDR + (10.12.4.0/22) -- lib-net PATTERN-1, never a hardcoded subnet id -- asserts its gateway == + pinned PLANE_GW (10.12.4.1) and that the FIP pool 10.12.5.0-10.12.7.254 is a RESERVED + iprange on it (KI-P3-001). POST gate (auto-detected if provider-ext exists): external/flat/ + physnet1/NOT-shared + subnet cidr/gateway/no-dhcp/FIP-pool. Sources lib-net.sh + need_jq; + requires admin-openrc sourced + the 'admin' MAAS profile; never calls 'maas list' (DOCFIX-016). + Exit 0 PROCEED|PASS / 1 HOLD|FAIL / 2 precondition. Mutates nothing. + tests/phase-04/ -- offline regression (real jq + fake maas/openstack data shims; no live + infra). 7/7 green: PRE PROCEED (net absent); POST PASS for BOTH allocation_pools shapes + (list-of-objects AND list-of-strings -- tolerance proven, not assumed, so the live client's + shape cannot silently break the gate); and four failure variants (FIP pool not reserved; + wrong gateway; provider subnet absent-by-CIDR; provider-ext shared=true). bash -n clean; + shellcheck 0.9.0 clean (no warnings) on script + harness + shims; ASCII + 0 CR on all five. + NOTE: fixtures put the provider subnet at id=7 (NOT 1) on purpose, to prove CIDR discovery is + id-independent (the exact failure mode DOCFIX-047 guards against). + +DOCFIX-047 -- phase-04 do-doc hardcodes the provider MAAS subnet id (violates PATTERN-1). + runbooks/phase-04-network-carve.md reads the provider gateway via `maas admin subnet read 1` + and its CHECK prose says "subnet id 1 (provider)" / "subnet id 2 (metal)" -- the PRE-D-052 + two-plane numbering. lib-net.sh:9 records that the D-052 cutover renumbered subnets (metal- + internal moved id 6 -> 10), so a hardcoded `read 1` may now read the WRONG subnet. FIX (for + completion consolidation): replace `subnet read 1` / the "subnet id N" prose with CIDR-based + discovery (select(.cidr=="10.12.4.0/22")), exactly as scripts/phase-04-network-verify.sh does; + cross-ref the verify script from the do-doc's CHECK block. Not yet applied to the do-doc. + ### Next-free numbers -Design decision: D-056. Doc fix: DOCFIX-047. +Design decision: D-056. Doc fix: DOCFIX-048. diff --git a/scripts/phase-04-network-verify.sh b/scripts/phase-04-network-verify.sh new file mode 100644 index 0000000..98e25d7 --- /dev/null +++ b/scripts/phase-04-network-verify.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# scripts/phase-04-network-verify.sh +# +# Read-only verify for phase-04 (Neutron external provider network + FIP subnet). +# Two gates, both safe to re-run; mutates NOTHING: +# +# PRE (verify-before-mutate for Step 4.1): the MAAS provider subnet is discovered +# BY CIDR (lib-net PATTERN-1 -- subnet IDs drift across cutovers, so the do-doc's +# 'maas admin subnet read 1' is a post-D-052 landmine; this resolves by the +# provider CIDR), its gateway matches the pinned provider gateway, and the FIP +# pool 10.12.5.0-10.12.7.254 is a RESERVED iprange on that subnet (so neutron can +# own the pool without colliding with a MAAS auto-static primary -- KI-P3-001). +# +# POST (auto-detected only if 'provider-ext' exists): the phase-04 EXIT GATE asserts -- +# network external=true / type=flat / physnet1 / NOT shared (Option B isolation); +# subnet cidr / gateway / no-dhcp / FIP allocation pool. +# +# Requires: jumphost; jq; admin-openrc sourced (OS_AUTH_URL set); the 'admin' MAAS profile. +# NEVER runs 'maas list' (it prints the API key -- DOCFIX-016). +# +# Usage: source ~/admin-openrc && scripts/phase-04-network-verify.sh +# Exit: 0 PROCEED (pre clear, network not yet made) or PASS (post clear) +# 1 HOLD / FAIL (an assertion failed) +# 2 precondition (jq/openstack/maas missing, openrc not sourced, MAAS not logged in) +# +# Resolves dynamically; the only literals are the D-003 design values (provider CIDR, +# FIP pool, neutron object names), each carrying provenance. Read-only. ASCII + LF. + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true +IFS=$'\n\t' + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" + +# --- phase-04 design literals (D-003; TAG: confirm per site on rebuild) -------------- +PROVIDER_CIDR="10.12.4.0/22" # provider-public plane (lib-net PLANE_CIDRS[0]) +GW_EXPECT="${PLANE_GW[$PROVIDER_CIDR]}" # 10.12.4.1 -- pinned; asserted against live MAAS +FIP_START="10.12.5.0" +FIP_END="10.12.7.254" +EXT_NET="provider-ext" +EXT_SUBNET="provider-ext-fip" +PHYSNET="physnet1" + +FATAL=0 +fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL + 1)); } +pass() { echo "PASS: $*"; } + +# --- preconditions ------------------------------------------------------------------ +need_jq || exit 2 +command -v openstack >/dev/null 2>&1 || { echo "FAIL: openstack client not found" >&2; exit 2; } +command -v maas >/dev/null 2>&1 || { echo "FAIL: maas client not found" >&2; exit 2; } +[ -n "${OS_AUTH_URL:-}" ] || { echo "FAIL: OS_AUTH_URL unset -- 'source ~/admin-openrc' first" >&2; exit 2; } + +# MAAS 'admin' profile must be usable (read-only; NOT 'maas list' -- DOCFIX-016). +SUBNETS_JSON="$(maas admin subnets read 2>/dev/null || true)" +printf '%s' "$SUBNETS_JSON" | jq -e 'type=="array"' >/dev/null 2>&1 \ + || { echo "FAIL: 'maas admin subnets read' did not return JSON (profile 'admin' logged in?)" >&2; exit 2; } + +echo "=== phase-04 network-carve verify (read-only) ===" +echo + +# --- PRE A: discover the provider subnet BY CIDR (PATTERN-1) ------------------------ +echo "--- PRE: MAAS provider subnet (by CIDR $PROVIDER_CIDR; never by hardcoded id) ---" +mapfile -t SROW < <(printf '%s' "$SUBNETS_JSON" \ + | jq -r --arg c "$PROVIDER_CIDR" '.[] | select(.cidr==$c) | "\(.id)\t\(.gateway_ip)"') +if [ "${#SROW[@]}" -eq 0 ]; then + fail "no MAAS subnet with cidr $PROVIDER_CIDR (provider plane missing?)" + echo "Summary: HOLD (provider subnet not found)"; exit 1 +elif [ "${#SROW[@]}" -gt 1 ]; then + fail "multiple MAAS subnets match cidr $PROVIDER_CIDR (ambiguous): ${SROW[*]}" + echo "Summary: HOLD (ambiguous provider subnet)"; exit 1 +fi +SID="${SROW[0]%%$'\t'*}" +GW_LIVE="${SROW[0]##*$'\t'}" +echo " provider subnet id=$SID (discovered by CIDR) gateway=$GW_LIVE" + +# --- PRE B: gateway assertion ------------------------------------------------------- +if [ "$GW_LIVE" = "$GW_EXPECT" ]; then + pass "provider gateway $GW_LIVE matches pinned $GW_EXPECT" +else + fail "provider gateway $GW_LIVE != pinned $GW_EXPECT (lib-net PLANE_GW)" +fi + +# --- PRE C: FIP pool reserved on the provider subnet -------------------------------- +echo "--- PRE: FIP pool $FIP_START-$FIP_END must be a RESERVED iprange on subnet $SID ---" +IPR_JSON="$(maas admin ipranges read 2>/dev/null || true)" +if ! printf '%s' "$IPR_JSON" | jq -e 'type=="array"' >/dev/null 2>&1; then + fail "'maas admin ipranges read' did not return JSON" +else + echo " reserved ranges on subnet $SID:" + printf '%s' "$IPR_JSON" | jq -r --argjson s "$SID" \ + '.[] | select(.type=="reserved" and .subnet.id==$s) | " \(.start_ip)-\(.end_ip) [\(.comment // "")]"' \ + || true + if printf '%s' "$IPR_JSON" | jq -e --argjson s "$SID" --arg a "$FIP_START" --arg b "$FIP_END" \ + 'any(.[]; .type=="reserved" and .subnet.id==$s and .start_ip==$a and .end_ip==$b)' >/dev/null 2>&1; then + pass "FIP pool $FIP_START-$FIP_END is RESERVED on subnet $SID (neutron can own it)" + else + fail "FIP pool $FIP_START-$FIP_END is NOT a reserved iprange on subnet $SID (phase-00 carve missing -- KI-P3-001 risk)" + fi +fi +echo + +# --- POST: phase-04 EXIT GATE (only if the neutron network already exists) ---------- +echo "--- POST: neutron provider network (asserts only if it exists) ---" +POST_PRESENT=0 +if NET_JSON="$(openstack network show "$EXT_NET" -f json 2>/dev/null)"; then + POST_PRESENT=1 + ext="$(printf '%s' "$NET_JSON" | jq -r '."router:external"')" + ntype="$(printf '%s' "$NET_JSON" | jq -r '."provider:network_type"')" + pnet="$(printf '%s' "$NET_JSON" | jq -r '."provider:physical_network"')" + shared="$(printf '%s' "$NET_JSON" | jq -r '.shared')" + echo " network $EXT_NET: external=$ext type=$ntype physnet=$pnet shared=$shared" + [ "$ext" = "true" ] || fail "$EXT_NET external != true (got '$ext')" + [ "$ntype" = "flat" ] || fail "$EXT_NET type != flat (got '$ntype')" + [ "$pnet" = "$PHYSNET" ] || fail "$EXT_NET physnet != $PHYSNET (got '$pnet')" + [ "$shared" = "false" ] || fail "$EXT_NET shared != false (Option B isolation; got '$shared')" + + if SUB_JSON="$(openstack subnet show "$EXT_SUBNET" -f json 2>/dev/null)"; then + cidr="$(printf '%s' "$SUB_JSON" | jq -r '.cidr')" + sgw="$(printf '%s' "$SUB_JSON" | jq -r '.gateway_ip')" + dhcp="$(printf '%s' "$SUB_JSON" | jq -r '.enable_dhcp')" + # allocation_pools shape varies by client version: list of {start,end}, + # list of "start-end" strings, or a single string. Match all three. + poolmatch="$(printf '%s' "$SUB_JSON" | jq -r --arg a "$FIP_START" --arg b "$FIP_END" ' + (.allocation_pools // empty) as $p + | if ($p|type)=="array" + then any($p[]; + (type=="object" and .start==$a and .end==$b) + or (type=="string" and contains($a) and contains($b))) + elif ($p|type)=="string" then ($p|contains($a) and contains($b)) + else false end')" + echo " subnet $EXT_SUBNET: cidr=$cidr gateway=$sgw dhcp=$dhcp fip-pool-match=$poolmatch" + [ "$cidr" = "$PROVIDER_CIDR" ] || fail "$EXT_SUBNET cidr != $PROVIDER_CIDR (got '$cidr')" + [ "$sgw" = "$GW_EXPECT" ] || fail "$EXT_SUBNET gateway != $GW_EXPECT (got '$sgw')" + [ "$dhcp" = "false" ] || fail "$EXT_SUBNET enable_dhcp != false (got '$dhcp')" + [ "$poolmatch" = "true" ] || fail "$EXT_SUBNET allocation_pool != $FIP_START-$FIP_END" + else + fail "network $EXT_NET exists but subnet $EXT_SUBNET is MISSING" + fi +else + echo " $EXT_NET not created yet -- PRE gate is the operative check (run Step 4.1 to create)." +fi +echo + +# --- verdict ------------------------------------------------------------------------ +if [ "$FATAL" -ne 0 ]; then + echo "Summary: HOLD/FAIL -- $FATAL assertion(s) failed above." + exit 1 +fi +if [ "$POST_PRESENT" -eq 1 ]; then + echo "Summary: PASS -- phase-04 EXIT GATE met (provider-ext + provider-ext-fip correct)." +else + echo "Summary: PROCEED -- PRE gate clear (FIP pool reserved, provider gateway pinned)." + echo " Next: run phase-04 Step 4.1 (create provider-ext + provider-ext-fip), then re-run this." +fi +exit 0 diff --git a/tests/phase-04/fakebin/maas b/tests/phase-04/fakebin/maas new file mode 100644 index 0000000..752fdb8 --- /dev/null +++ b/tests/phase-04/fakebin/maas @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# fake maas: only 'maas admin subnets read' and 'maas admin ipranges read' are emulated, +# each emitting the env-pointed fixture. Anything else emits nothing. Never emulates +# 'maas list' (the real one prints the API key -- DOCFIX-016; the verify must not call it). +prof="${1:-}"; obj="${2:-}"; act="${3:-}" +if [ "$prof" = "admin" ] && [ "$obj" = "subnets" ] && [ "$act" = "read" ]; then + cat "${FIX_SUBNETS:?FIX_SUBNETS not set}" +elif [ "$prof" = "admin" ] && [ "$obj" = "ipranges" ] && [ "$act" = "read" ]; then + cat "${FIX_IPRANGES:?FIX_IPRANGES not set}" +fi diff --git a/tests/phase-04/fakebin/openstack b/tests/phase-04/fakebin/openstack new file mode 100644 index 0000000..0ae54c2 --- /dev/null +++ b/tests/phase-04/fakebin/openstack @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# fake openstack: emulates 'network show -f json' and 'subnet show -f json'. +# Emits the env-pointed fixture (exit 0) when set+present, else a not-found error (exit 1), +# so the verify's "network not created yet" branch can be exercised. +cmd="${1:-}"; sub="${2:-}"; name="${3:-}" +if [ "$cmd" = "network" ] && [ "$sub" = "show" ]; then + if [ -n "${FIX_NET:-}" ] && [ -f "${FIX_NET}" ]; then cat "$FIX_NET"; exit 0; fi + echo "No Network found for $name" >&2; exit 1 +elif [ "$cmd" = "subnet" ] && [ "$sub" = "show" ]; then + if [ -n "${FIX_SUBNET:-}" ] && [ -f "${FIX_SUBNET}" ]; then cat "$FIX_SUBNET"; exit 0; fi + echo "No Subnet found for $name" >&2; exit 1 +fi +exit 0 diff --git a/tests/phase-04/make_fixtures.py b/tests/phase-04/make_fixtures.py new file mode 100644 index 0000000..17e0b46 --- /dev/null +++ b/tests/phase-04/make_fixtures.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# tests/phase-04/make_fixtures.py OUTDIR +# Emit MAAS (subnets/ipranges) + neutron (network/subnet) JSON fixtures mirroring the +# real schemas the phase-04 verify reads. The provider subnet uses id=7 (NOT 1) on +# purpose: it proves the script discovers BY CIDR, id-independently (PATTERN-1). +import json, os, sys + +out = sys.argv[1] +os.makedirs(out, exist_ok=True) +def w(name, obj): + with open(os.path.join(out, name), "w") as f: + json.dump(obj, f, indent=2) + +PROVIDER_ID = 7 # deliberately not 1 + +def subnet(i, cidr, gw, name): + return {"id": i, "cidr": cidr, "gateway_ip": gw, "name": name, + "vlan": {"id": 5000 + i, "vid": 0}} + +# --- MAAS subnets --- +w("subnets-good.json", [ + subnet(PROVIDER_ID, "10.12.4.0/22", "10.12.4.1", "provider-public"), + subnet(8, "10.12.8.0/22", "10.12.8.1", "metal-admin"), + subnet(10, "10.12.12.0/22", None, "metal-internal"), +]) +w("subnets-badgw.json", [ + subnet(PROVIDER_ID, "10.12.4.0/22", "10.12.4.254", "provider-public"), # wrong gw + subnet(8, "10.12.8.0/22", "10.12.8.1", "metal-admin"), +]) +w("subnets-noprovider.json", [ + subnet(8, "10.12.8.0/22", "10.12.8.1", "metal-admin"), + subnet(10, "10.12.12.0/22", None, "metal-internal"), +]) + +def ipr(i, t, a, b, sid, comment=""): + return {"id": i, "type": t, "start_ip": a, "end_ip": b, + "comment": comment, "subnet": {"id": sid}} + +# --- MAAS ipranges --- reserved FIP pool present (good) vs absent (nofip) +common = [ + ipr(1, "dynamic", "10.12.9.0", "10.12.11.254", 8, "maas pxe/dhcp"), + ipr(2, "reserved", "10.12.4.2", "10.12.4.63", PROVIDER_ID, "provider VIP /26"), + ipr(4, "reserved", "10.12.8.2", "10.12.8.63", 8, "metal VIP /26"), +] +w("ipranges-good.json", common + [ + ipr(3, "reserved", "10.12.5.0", "10.12.7.254", PROVIDER_ID, "FIP pool / ext_net (D-003)"), +]) +w("ipranges-nofip.json", common) # FIP pool reservation missing + +# --- neutron network show -f json --- +w("net-good.json", { + "name": "provider-ext", "router:external": True, + "provider:network_type": "flat", "provider:physical_network": "physnet1", + "shared": False, "tags": ["role=provider"], +}) +w("net-bad-shared.json", { + "name": "provider-ext", "router:external": True, + "provider:network_type": "flat", "provider:physical_network": "physnet1", + "shared": True, "tags": ["role=provider"], # WRONG: shared breaks Option B +}) + +# --- neutron subnet show -f json --- two allocation_pools shapes (prove tolerance) +def sub(pools): + return {"name": "provider-ext-fip", "cidr": "10.12.4.0/22", + "gateway_ip": "10.12.4.1", "enable_dhcp": False, + "allocation_pools": pools, + "tags": ["role=provider", "netbox-iprange=10.12.5.0-10.12.7.254"]} +w("subnet-good-objpool.json", sub([{"start": "10.12.5.0", "end": "10.12.7.254"}])) +w("subnet-good-strpool.json", sub(["10.12.5.0-10.12.7.254"])) + +print("fixtures written to", out) diff --git a/tests/phase-04/run-tests.sh b/tests/phase-04/run-tests.sh new file mode 100644 index 0000000..f22f966 --- /dev/null +++ b/tests/phase-04/run-tests.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# tests/phase-04/run-tests.sh -- offline regression for phase-04-network-verify.sh. +# Behavior-tests the REAL script against fake maas/openstack data shims + real jq. +# No live infra. Needs python3, bash, and jq on PATH. +set -euo pipefail +IFS=$'\n\t' +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPTS="$(cd "$HERE/../../scripts" && pwd)" +TARGET="$SCRIPTS/phase-04-network-verify.sh" +BIN="$HERE/fakebin" + +command -v python3 >/dev/null 2>&1 || { echo "FAIL: python3 required" >&2; exit 1; } +command -v jq >/dev/null 2>&1 || { echo "FAIL: jq required (install jq)" >&2; exit 1; } +[ -f "$TARGET" ] || { echo "FAIL: target missing: $TARGET" >&2; exit 1; } +chmod +x "$BIN/maas" "$BIN/openstack" 2>/dev/null || true # git/Windows drops exec bits + +WORK="$(mktemp -d)"; trap 'rm -rf "$WORK"' EXIT +python3 "$HERE/make_fixtures.py" "$WORK" >/dev/null +rc_all=0 + +# run