Newer
Older
openstack-caracal-ipv4 / scripts / phase-00-maas-standup.sh
#!/usr/bin/env bash
# scripts/phase-00-maas-standup.sh [--apply]
#
# MAAS topology stand-up for the v1 (VR0 / Baldurkeep) plane scheme (D-052 / D-053).
# Idempotently brings MAAS to the target fabric/VLAN/subnet/space layout so the
# carve + bundle can resolve every plane. Useful BOTH for a fresh test cloud
# (everything absent -> full create plan) and for an existing cloud (present-and-
# correct -> SKIP; present-but-wrong -> reported as DRIFT, never silently changed).
#
# Default is DRY-RUN (the audit): resolves every id live BY CIDR/NAME (PATTERN-1,
# no hardcoded MAAS ids) and prints each mutation it WOULD run, changing nothing.
# Pass --apply to execute. Re-runnable; anything already correct is SKIPped.
#
# SAFETY: this script NEVER deletes. The target is the D-052/D-053 plane scheme,
# which the live test cloud already matches -- so a dry-run reports "no drift" and
# --apply is a no-op here. The refuse-to-clobber guard remains for a FRESH cloud
# (Roosevelt): if a subnet is present at a target CIDR but bound to the WRONG plane
# or VID, that is a destructive re-CIDR (MAAS cannot change a CIDR in place) and is
# OUT OF SCOPE -- it is reported in the DRIFT section and gated to a human teardown
# step. This script will refuse to create a target subnet whose CIDR is occupied
# by the wrong plane.
#
# SINGLE MAAS-address authority: owns topology (fabric/VLAN/subnet/space/gateway/
# managed/dns) AND every reserved range -- API-VIP bands, the Neutron FIP pool, and
# mgmt reserves. phase-00-maas-carve.sh is RETIRED: its FIP/VIP/mgmt reserves are
# folded in here.
#
# Order matters (MAAS semantics + fresh-fabric bootstrap): untagged base planes
# first (each owns a fabric), then their tagged sibling rides that fabric, so a
# fresh MAAS bootstraps metal-admin -> metal-internal (VID 103). provider-public
# carries the public API VIPs + the FIP pool on one untagged plane (Pattern A).
#
# Exit: 0 ok (no drift) | 1 fatal or unresolved drift | 2 precondition
# CLI forms verified against Canonical MAAS how-to-manage-networks.
# ASCII + LF.

set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=scripts/lib-net.sh
. "$SCRIPT_DIR/lib-net.sh"

MAAS_PROFILE="${MAAS_PROFILE:-admin}"
MODE="dryrun"; [ "${1:-}" = "--apply" ] && MODE="apply"
FATAL=0; DRIFT=0

hdr()  { echo; echo "=== $* ==="; }
note() { echo "  - $*"; }
fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); }
need_jq || exit 1
# read wrapper: emit valid JSON, or "[]" if the read failed / was not JSON, so a
# stray MAAS error (e.g. a bogus/absent fabric id) can never crash us under set -e.
maas_json() { local out; out="$(maas "$MAAS_PROFILE" "$@" 2>/dev/null || true)"; printf '%s' "$out" | jq empty 2>/dev/null && printf '%s' "$out" || printf '[]'; }

emit() {  # <desc> <maas args...>
  local desc="$1"; shift
  if [ "$MODE" = "apply" ]; then
    echo "  DO: $desc"
    local out
    if ! out="$(maas "$MAAS_PROFILE" "$@" 2>&1)"; then
      fail "$desc"
      echo "       MAAS said: $(printf '%s' "$out" | grep -viE '^(Success|Machine-readable)' | head -3 | tr '\n' ' ')" >&2
      return 1
    fi
  else
    echo "  WOULD: $desc"
    echo "         maas $MAAS_PROFILE $*"
  fi
}

# --- PATTERN-1 resolve-by-CIDR/name helpers (no hardcoded ids) ---------------
sub_id()      { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|.id' | head -1; }
sub_vid()     { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.vid|tostring)' | head -1; }
sub_fabid()   { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.fabric_id|tostring)' | head -1; }
sub_space()   { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.space // "")' | head -1; }
sub_field()   { maas_json subnets read | jq -r --arg c "$1" --arg f "$2" '.[]|select(.cidr==$c)|(.[$f] // "")' | head -1; }
sub_mtu()     { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.mtu|tostring)' | head -1; }
sub_dns()     { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.dns_servers // []|join(","))' | head -1; }
space_id()    { maas_json spaces  read | jq -r --arg n "$1" '.[]|select(.name==$n)|(.id|tostring)' | head -1; }
fab_byname()  { maas_json fabrics read | jq -r --arg n "$1" '.[]|select(.name==$n)|(.id|tostring)' | head -1; }
vlanobj()     { case "$1" in "<"*) return;; esac; maas_json vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.id|tostring)' | head -1; }
vlanspace()   { case "$1" in "<"*) return;; esac; maas_json vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.space // "")' | head -1; }
vlan0obj()    { vlanobj "$1" 0; }   # the untagged (vid 0) default VLAN of a fabric

# --- target plane table (D-052/D-053): name|cidr|kind|vid|parent_cidr|gw|dnssrc|reserves
#   reserves = ";"-separated "lo:hi:label" entries (or "-"); label has no : ; |
#   kind=untagged owns a fabric; kind=tagged rides parent_cidr's fabric on <vid>.
#   "-" = none. dnssrc = a CIDR whose dns_servers to mirror, or "-".
PLANES="$(cat <<TBL
provider-public|10.12.4.0/22|untagged|-|-|10.12.4.1|-|10.12.4.2:10.12.4.100:provider-public API HA VIP band (D-052/D-053);10.12.4.101:10.12.4.110:provider-public mgmt reserve;10.12.5.0:10.12.7.254:Neutron external FIP pool (D-003)
metal-admin|10.12.8.0/22|untagged|-|-|10.12.8.1|-|10.12.8.2:10.12.8.100:metal-admin API HA VIP band (D-052/D-053);10.12.8.101:10.12.8.110:metal-admin mgmt reserve
metal-internal|10.12.12.0/22|tagged|103|10.12.8.0/22|-|-|10.12.12.2:10.12.12.100:metal-internal API HA VIP band (D-052/D-053)
data-tenant|10.12.16.0/22|untagged|-|-|-|-|-
storage|10.12.32.0/22|untagged|-|-|-|-|-
replication|10.12.36.0/22|untagged|-|-|-|-|-
TBL
)"

dt() { [ "$1" = "-" ] && echo "" || echo "$1"; }   # decode "-" sentinel to empty

hdr "MAAS stand-up  mode=$MODE  (D-052/D-053 target scheme)"

# ------------------------------------------------------------------ DRIFT scan
# Refuse-to-clobber: any live subnet sitting at a TARGET cidr but bound to the
# WRONG plane (space) or wrong vid is a re-CIDR/migration the human must resolve.
hdr "drift scan (non-destructive; reports re-CIDR/migration needs)"
declare -A WRONG_CIDR=()
while IFS='|' read -r name cidr kind vid _rest; do
  [ -n "$name" ] || continue
  curspace="$(sub_space "$cidr")"
  [ -n "$curspace" ] || continue                      # absent -> nothing to drift
  if [ "$curspace" != "$name" ]; then
    note "DRIFT: $cidr is space '$curspace' but the D-052/D-053 scheme assigns it to '$name' -- MIGRATE (delete+recreate; gated, NOT done here)"
    WRONG_CIDR["$cidr"]=1; DRIFT=$((DRIFT+1)); continue
  fi
  if [ "$kind" = tagged ]; then
    cv="$(sub_vid "$cidr")"
    if [ "$cv" != "$vid" ]; then
      note "DRIFT: $cidr space ok ('$name') but VID '$cv' != target $vid -- MIGRATE (gated)"
      WRONG_CIDR["$cidr"]=1; DRIFT=$((DRIFT+1))
    fi
  fi
done <<< "$PLANES"
[ "$DRIFT" -eq 0 ] && note "no drift: no target CIDR is occupied by the wrong plane"

# ------------------------------------------------------------- per-plane standup
while IFS='|' read -r name cidr kind vid parent gw dnssrc reserves; do
  [ -n "$name" ] || continue
  gw="$(dt "$gw")"; dnssrc="$(dt "$dnssrc")"; reserves="$(dt "$reserves")"
  hdr "plane $name ($cidr, $kind${vid:+ }$( [ "$kind" = tagged ] && echo "VID $vid" ))"

  # refuse to build onto a CIDR the drift scan flagged as the wrong plane
  if [ -n "${WRONG_CIDR[$cidr]:-}" ]; then
    fail "$cidr occupied by the wrong plane (see DRIFT) -- resolve the migration first; skipping $name"
    continue
  fi

  # ---- resolve the fabric this plane lives on ----
  fab=""
  if [ -n "$(sub_id "$cidr")" ]; then
    fab="$(sub_fabid "$cidr")"
  elif [ "$kind" = tagged ]; then
    fab="$(sub_fabid "$parent")"
    if [ -z "$fab" ]; then
      [ "$MODE" = apply ] && { fail "$name: parent subnet $parent absent -- create it first"; continue; }
      fab="<fabric-of-$parent>"   # dry-run: parent is planned above; show the plan
    fi
  else
    fab="$(fab_byname "$name")"
    if [ -z "$fab" ]; then
      emit "create fabric $name" fabrics create name="$name"
      fab="$(fab_byname "$name")"               # re-resolve (apply: now exists)
      [ -n "$fab" ] || fab="<fabric-$name-id>"  # dry-run placeholder
    fi
  fi
  note "fabric = $fab"

  # ---- space ----
  if [ -z "$(space_id "$name")" ]; then
    emit "create space $name" spaces create name="$name"
  else note "space $name exists -- SKIP"; fi
  sid="$(space_id "$name")"; [ -n "$sid" ] || sid="<space-$name-id>"

  # ---- VLAN + the vlan-obj the subnet will ride ----
  if [ "$kind" = tagged ]; then
    if [ -z "$(vlanobj "$fab" "$vid")" ]; then
      mtu="$(sub_mtu "$parent")"; { [ -n "$mtu" ] && [ "$mtu" != null ]; } || mtu="1500"
      emit "create VLAN vid=$vid name=$name mtu=$mtu on fabric $fab" \
        vlans create "$fab" name="$name" vid="$vid" mtu="$mtu"
    else note "VID $vid on fabric $fab exists -- SKIP"; fi
    if [ "$(vlanspace "$fab" "$vid")" != "$name" ]; then
      emit "assign fabric $fab vid $vid -> space $name (id $sid)" vlan update "$fab" "$vid" space="$sid"
    else note "VID $vid already in space $name -- SKIP"; fi
    vobj="$(vlanobj "$fab" "$vid")"; [ -n "$vobj" ] || vobj="<vid-$vid-on-fab-$fab-id>"
  else
    # untagged: rides the fabric default (vid 0); assign that vid-0 VLAN to the space
    if [ "$(vlanspace "$fab" 0)" != "$name" ]; then
      emit "assign fabric $fab untagged(vid 0) -> space $name (id $sid)" vlan update "$fab" 0 space="$sid"
    else note "untagged VLAN on fabric $fab already in space $name -- SKIP"; fi
    vobj="$(vlan0obj "$fab")"; [ -n "$vobj" ] || vobj="<untagged-vlan-on-fab-$fab-id>"
  fi

  # ---- subnet (guard wrong-VID if present) ----
  if [ -z "$(sub_id "$cidr")" ]; then
    emit "create subnet $cidr vlan=$vobj" subnets create cidr="$cidr" vlan="$vobj"
  else
    if [ "$kind" = tagged ]; then
      got="$(sub_vid "$cidr")"
      [ "$got" = "$vid" ] || { fail "subnet $cidr exists on VID '$got', expected $vid -- refusing"; continue; }
    fi
    note "subnet $cidr exists -- SKIP create"
  fi

  # ---- gateway / managed / dns ----
  if [ -n "$gw" ]; then
    if [ "$(sub_field "$cidr" gateway_ip)" != "$gw" ]; then
      emit "subnet $cidr -> gateway_ip=$gw" subnet update "$cidr" gateway_ip="$gw"
    else note "gateway_ip already $gw -- SKIP"; fi
  fi
  if [ "$(sub_field "$cidr" managed)" != "true" ]; then
    emit "subnet $cidr -> managed=true" subnet update "$cidr" managed=true
  else note "subnet $cidr already managed -- SKIP"; fi
  if [ -n "$dnssrc" ]; then
    dns="$(sub_dns "$dnssrc")"
    if [ -n "$dns" ] && [ "$dns" != null ]; then
      if [ "$(sub_dns "$cidr")" != "$dns" ]; then
        emit "subnet $cidr -> dns_servers=$dns (mirrors $dnssrc)" subnet update "$cidr" dns_servers="$dns"
      else note "dns_servers already $dns -- SKIP"; fi
    else note "dns source $dnssrc has no dns_servers -- leaving $cidr dns unset"; fi
  fi

  # ---- reserved ranges (API-VIP bands, FIP pool, mgmt) -- D-058 single authority ----
  if [ -n "$reserves" ]; then
    IPR="$(maas_json ipranges read)"
    IFS=';' read -ra RES <<< "$reserves"
    for r in "${RES[@]}"; do
      rlo="${r%%:*}"; rrest="${r#*:}"; rhi="${rrest%%:*}"; rlabel="${rrest#*:}"
      if printf '%s' "$IPR" | jq -e --arg lo "$rlo" '.[]|select(.start_ip==$lo)' >/dev/null 2>&1; then
        note "reserved range starting $rlo exists -- SKIP"
      else
        rsid="$(sub_id "$cidr")"; [ -n "$rsid" ] || rsid="<subnet-$cidr-id>"
        emit "create reserved range $rlo-$rhi ($rlabel) on subnet $rsid" \
          ipranges create type=reserved subnet="$rsid" start_ip="$rlo" end_ip="$rhi" comment="$rlabel"
      fi
    done
  fi
done <<< "$PLANES"

# ----------------------------------------------------------------------- result
hdr "result"
[ "$DRIFT" -eq 0 ] || echo "  $DRIFT plane(s) need a gated re-CIDR/migration before they can be stood up (see DRIFT)."
if [ "$FATAL" -ne 0 ]; then echo "  completed with $FATAL failure(s)"; exit 1; fi
if [ "$DRIFT" -ne 0 ]; then echo "  OK ($MODE) -- but exit 1 due to unresolved drift"; exit 1; fi
echo "  OK ($MODE) -- topology consistent with D-052/D-053"