Newer
Older
openstack-caracal-ipv4 / scripts / phase-00-maas-recidr.sh
#!/usr/bin/env bash
# scripts/phase-00-maas-recidr.sh [--apply]
#
# Gated MAAS re-CIDR migration D-052/053 -> D-058 for the planes whose CIDR MOVES:
#   metal-admin     10.12.8.0/22  -> 10.12.12.0/22  (untagged, metal fabric)
#   metal-internal  10.12.12.0/22 -> 10.12.16.0/22  (VID 103,  metal fabric)
#   data-tenant     10.12.16.0/22 -> 10.12.20.0/22  (untagged, data fabric)
# (provider-vip 10.12.8.0/22 is NEW, not a move -- the standup creates it once .8 is freed.)
#
# REUSE-IN-PLACE: MAAS cannot change a subnet's CIDR, so each plane is migrated by
# deleting the old subnet and recreating it at the new CIDR on the SAME fabric + SAME
# VLAN. The VLAN (and its space assignment) persists across the subnet delete, so the
# new subnet inherits the correct space with no space/VLAN edits. Existing fabrics are
# kept (no orphaned fabrics).
#
# Default is DRY-RUN (audit): resolves everything live BY CIDR (PATTERN-1, no hardcoded
# ids), verifies each old subnet is on its expected space + VLAN, surfaces the metal/data
# fabric ids, lists reserved ranges + any live IP allocations, and prints the plan. Pass
# --apply to execute. COLLISION-SAFE: all old subnets are deleted BEFORE any new subnet is
# created (each new CIDR is the old CIDR of another plane, freed by the deletes).
#
# This script does ONLY the destructive subnet swap. Gateways, managed, dns, the reserved
# bands, and provider-vip are the standup's job -- run AFTER this:
#     scripts/phase-00-maas-standup.sh --apply        (build provider-vip + reserves + attrs)
#     scripts/phase-00-maas-standup.sh                (dry-run: expect all-SKIP, no drift)
#
# PRE-REQS: openstack model torn down + hosts released, so the subnets have no live links.
# If MAAS refuses a delete (interfaces still linked), the error is surfaced and we STOP --
# clear the links (release/delete the machines) and re-run. We never force-delete.
#
# Exit: 0 ok (or nothing to migrate) | 1 fatal / unexpected state | 2 precondition
# CLI forms per Canonical MAAS how-to-manage-networks. ASCII + LF.

set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=scripts/lib-net.sh
. "$SCRIPT_DIR/lib-net.sh"

MAAS_PROFILE="${MAAS_PROFILE:-admin}"
MODE="dryrun"; [ "${1:-}" = "--apply" ] && MODE="apply"
FATAL=0

hdr()  { echo; echo "=== $* ==="; }
note() { echo "  - $*"; }
fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); }
need_jq || exit 1

# read wrapper: valid JSON or "[]" so a stray MAAS error never crashes us under set -e.
maas_json() { local out; out="$(maas "$MAAS_PROFILE" "$@" 2>/dev/null || true)"; printf '%s' "$out" | jq empty 2>/dev/null && printf '%s' "$out" || printf '[]'; }

emit() {  # <desc> <maas args...>
  local desc="$1"; shift
  if [ "$MODE" = "apply" ]; then
    echo "  DO: $desc"
    local out
    if ! out="$(maas "$MAAS_PROFILE" "$@" 2>&1)"; then
      fail "$desc"
      echo "       MAAS said: $(printf '%s' "$out" | grep -viE '^(Success|Machine-readable)' | head -3 | tr '\n' ' ')" >&2
      return 1
    fi
  else
    echo "  WOULD: $desc"
    echo "         maas $MAAS_PROFILE $*"
  fi
}

sub_id()    { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.id|tostring)' | head -1; }
sub_vid()   { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.vid|tostring)' | head -1; }
sub_fabid() { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.fabric_id|tostring)' | head -1; }
sub_space() { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.space // "")' | head -1; }
vlanobj()   { maas_json vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.id|tostring)' | head -1; }
ipr_ids_on(){ maas_json ipranges read | jq -r --arg s "$1" '.[]|select((.subnet.id|tostring)==$s)|(.id|tostring)'; }
allocs_on() { maas "$MAAS_PROFILE" subnet ip-addresses "$1" 2>/dev/null | jq -r 'if type=="array" then (.[]|.ip // .start_ip // empty) else empty end' 2>/dev/null || true; }

# --- migration table: name|old_cidr|new_cidr|kind|vid (vid=0 for untagged) ---
MIG="$(cat <<TBL
metal-admin|10.12.8.0/22|10.12.12.0/22|untagged|0
metal-internal|10.12.12.0/22|10.12.16.0/22|tagged|103
data-tenant|10.12.16.0/22|10.12.20.0/22|untagged|0
TBL
)"

hdr "MAAS re-CIDR  D-052/053 -> D-058  mode=$MODE"
note "reuse-in-place: new subnet created on each plane's EXISTING fabric + VLAN; spaces untouched."

# ---------------------------------------------------------------- AUDIT (capture)
declare -A M_FAB M_VID M_OLDSUB M_NEW
ORDER=(); PENDING=0
hdr "audit (read-only): resolve + verify each migrating plane by its OLD cidr"
while IFS='|' read -r name ocidr ncidr kind vid; do
  [ -n "$name" ] || continue
  osub="$(sub_id "$ocidr")"
  if [ -z "$osub" ]; then note "$name: no subnet at old $ocidr -- already migrated or absent; SKIP"; continue; fi
  curspace="$(sub_space "$ocidr")"
  if [ "$curspace" != "$name" ]; then
    note "$name: old $ocidr is now space '$curspace' (not '$name') -- already migrated or not this plane; SKIP"; continue; fi
  want_vid=$([ "$kind" = tagged ] && echo "$vid" || echo 0)
  gotvid="$(sub_vid "$ocidr")"
  if [ "$gotvid" != "$want_vid" ]; then
    fail "$name: subnet $ocidr on VID '$gotvid', expected $want_vid -- refusing"; continue; fi
  fab="$(sub_fabid "$ocidr")"
  ranges="$(ipr_ids_on "$osub" | tr '\n' ' ')"
  allocs="$(allocs_on "$osub" | tr '\n' ' ')"
  M_FAB["$name"]="$fab"; M_VID["$name"]="$want_vid"; M_OLDSUB["$name"]="$osub"; M_NEW["$name"]="$ncidr"
  ORDER+=("$name"); PENDING=$((PENDING+1))
  note "$name: $ocidr (subnet $osub, fabric $fab, vid $want_vid) -> $ncidr on the SAME fabric/vid"
  [ -n "${ranges// }" ] && note "    reserved range ids to delete first: $ranges"
  [ -n "${allocs// }" ] && note "    NOTE live IP allocations present ($allocs) -- if a delete is refused, clear these (release/delete machines) and re-run"
done <<< "$MIG"

hdr "fabric summary (eyeball before any mutation)"
note "metal fabric (metal-admin/metal-internal) = ${M_FAB[metal-admin]:-${M_FAB[metal-internal]:-?}}"
note "data  fabric (data-tenant)                = ${M_FAB[data-tenant]:-?}"
note "provider fabric (provider-vip target, handled by standup) = resolve via provider-public 10.12.4.0/22"

[ "$FATAL" -eq 0 ] || { echo; echo "completed with $FATAL failure(s) -- fix the unexpected state above before proceeding"; exit 1; }
if [ "$PENDING" -eq 0 ]; then hdr "result"; note "nothing to migrate (no old-scheme subnets present)"; echo; echo "OK ($MODE)"; exit 0; fi

# ------------------------------------------------------------------------- PLAN
if [ "$MODE" = dryrun ]; then
  hdr "PLAN (dry-run -- nothing changed)"
  echo "  1) delete (ranges then subnet), all $PENDING old subnets first:"
  for n in "${ORDER[@]}"; do echo "       - $n  delete subnet ${M_OLDSUB[$n]} (was the old CIDR)"; done
  echo "  2) create new subnets on the same fabric/VLAN (collision-free after the deletes):"
  for n in "${ORDER[@]}"; do echo "       - $n  create ${M_NEW[$n]} on fabric ${M_FAB[$n]} vid ${M_VID[$n]}"; done
  echo "  3) then: scripts/phase-00-maas-standup.sh --apply   (provider-vip + gateways + dns + reserves)"
  echo "          scripts/phase-00-maas-standup.sh            (verify: all-SKIP, no drift)"
  echo
  echo "  re-run with --apply to execute."
  exit 0
fi

# ----------------------------------------------------------------------- MUTATE
hdr "MUTATE 1/2: delete old subnets (ranges first), collision-safe"
for n in "${ORDER[@]}"; do
  osub="${M_OLDSUB[$n]}"
  for rid in $(ipr_ids_on "$osub"); do emit "delete iprange $rid (on $n old subnet $osub)" iprange delete "$rid" || true; done
  emit "delete subnet id=$osub ($n old CIDR)" subnet delete "$osub" || true
done
[ "$FATAL" -eq 0 ] || { echo; echo "delete phase hit $FATAL failure(s) -- STOP (likely live interface links; clear them and re-run). No new subnets created."; exit 1; }

hdr "MUTATE 2/2: create new subnets on the existing fabric/VLAN"
for n in "${ORDER[@]}"; do
  fab="${M_FAB[$n]}"; vid="${M_VID[$n]}"; ncidr="${M_NEW[$n]}"
  vobj="$(vlanobj "$fab" "$vid")"
  [ -n "$vobj" ] || { fail "$n: cannot resolve VLAN obj for fabric $fab vid $vid -- the VLAN should persist after subnet delete; aborting before create"; continue; }
  emit "create subnet $ncidr on fabric $fab vid $vid (vlan obj $vobj)" subnets create cidr="$ncidr" vlan="$vobj"
done

[ "$FATAL" -eq 0 ] || { echo; echo "completed with $FATAL failure(s)"; exit 1; }
hdr "next"
echo "  run: scripts/phase-00-maas-standup.sh --apply   (provider-vip + gateways + dns + reserves)"
echo "  then: scripts/phase-00-maas-standup.sh           (verify: all-SKIP, no drift)"
echo; echo "OK ($MODE)"