Newer
Older
openstack-caracal-ipv4 / scripts / maas-fabric-prune.sh
#!/usr/bin/env bash
# maas-fabric-prune.sh -- safely delete orphaned MAAS auto-fabrics (fabric-NN).
#
# WHY THIS EXISTS: auto-fabrics matching ^fabric-[0-9]+$ are minted at COMMISSIONING
# (one per non-boot NIC MAAS cannot map to a known fabric) and are NOT reclaimed when
# a machine is decomposed -- so they accumulate on every teardown/rebuild cycle. This
# is recurring maintenance, not a one-off.
#
# WHAT IT TOUCHES: ONLY auto-fabrics with ZERO subnets AND ZERO interfaces. It never
# deletes a named/renamed fabric, the default, or an auto-fabric that still carries a
# subnet (e.g. an LXD/substrate bridge: 10.37.x.0/24 + fd42::/64) or an interface.
#
# WHEN TO RUN: AFTER the interface carve (scripts/carve-host-interfaces.sh --apply on
# all hosts) has relocated host NICs onto the named fabrics. Run before that and this
# cycle's fabric-NN still hold a NIC each and are correctly reported WAIT (skipped).
#
# Dry-run by DEFAULT. Pass --apply to delete. Idempotent; safe to re-run.
set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CLASSIFY="$HERE/maas_fabric_classify.py"
PROFILE="${MAAS_PROFILE:-admin}"

MODE="dryrun"
case "${1:-}" in
  --apply) MODE="apply" ;;
  ""|--dry-run|--dryrun) MODE="dryrun" ;;
  *) echo "usage: $0 [--apply]"; exit 2 ;;
esac

command -v maas    >/dev/null 2>&1 || { echo "FATAL: maas CLI not found"; exit 1; }
command -v jq      >/dev/null 2>&1 || { echo "FATAL: jq not found"; exit 1; }
command -v python3 >/dev/null 2>&1 || { echo "FATAL: python3 not found"; exit 1; }
[ -f "$CLASSIFY" ] || { echo "FATAL: classifier missing: $CLASSIFY"; exit 1; }

TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT

# read-only snapshot of the three sources the classifier needs
maas "$PROFILE" fabrics  read > "$TMP/fabrics.json"
maas "$PROFILE" subnets  read > "$TMP/subnets.json"
maas "$PROFILE" machines read > "$TMP/machines.json"

RESULT="$(python3 "$CLASSIFY" "$TMP/fabrics.json" "$TMP/subnets.json" "$TMP/machines.json")"

echo "=== fabric audit (mode=$MODE) ==="
echo "$RESULT" | jq -r '.audit[]
  | "  id=\(.id)\tsubnets=\(.subnets)\tifaces=\(.ifaces)\t\(.name)\t\(.verdict)"' | sort -t= -k2 -n

mapfile -t DEL < <(echo "$RESULT" | jq -r '.delete_ids[]')

echo
if [ "${#DEL[@]}" -eq 0 ]; then
  echo "No orphaned auto-fabrics to delete."
  exit 0
fi
echo "Orphans (auto-fabric, 0 subnets, 0 ifaces) -> ${#DEL[@]}: ${DEL[*]}"

if [ "$MODE" != apply ]; then
  echo
  echo "DRY-RUN -- nothing deleted. Re-run with --apply to delete the orphans above."
  exit 0
fi

echo
fail=0
for id in "${DEL[@]}"; do
  echo "  deleting fabric id=$id ..."
  # MAAS itself refuses to delete a fabric with attached subnets/VLANs/interfaces;
  # we do NOT suppress that error -- a non-empty fabric is left intact and flagged.
  if maas "$PROFILE" fabric delete "$id" </dev/null; then
    echo "  PASS: fabric $id deleted"
  else
    echo "  FATAL: fabric $id delete REFUSED/failed -- left intact (investigate)"
    fail=1
  fi
done

echo
if [ "$fail" != 0 ]; then
  echo "Summary: one or more deletes failed -- see FATAL lines above"
  exit 1
fi
echo "Summary: ${#DEL[@]} orphaned fabric(s) deleted, 0 failures"