diff --git a/scripts/juju-spaces-check.sh b/scripts/juju-spaces-check.sh new file mode 100644 index 0000000..a820f4d --- /dev/null +++ b/scripts/juju-spaces-check.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# scripts/juju-spaces-check.sh [MODEL] +# +# Read-only juju identity + per-model SPACE assertion. Spaces are per-model in Juju, +# so this runs AFTER 'juju add-model' and before 'juju deploy'. Default MODEL=openstack. +# Asserts the six D-052/D-053 spaces present and the five stale names absent. +# +# Pinned values come from scripts/lib-net.sh (single source of truth). +# Exit codes: 0 pass | 1 fatal (missing/stale spaces, or juju error) | 2 model not present yet + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true +IFS=$'\n\t' + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" + +FATAL=0 +fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } +pass() { echo "PASS: $*"; } +note() { echo "NOTE: $*"; } + +MODEL="${1:-openstack}" +need_jq || exit 2 + +# whoami first, DIRECTLY (not in a command substitution) so an auth/macaroon +# password prompt can reach the terminal before any captured juju call. +echo "=== juju identity ===" +juju whoami || true +echo +echo "=== models ===" +juju models 2>&1 | sed 's/^/ /' || true +echo + +# Is MODEL present? Strip any owner/ prefix; match the bare model name. +if ! juju models --format json 2>/dev/null | jq -r '.models[]?.name' 2>/dev/null | sed 's#.*/##' | grep -qx "$MODEL"; then + note "model '$MODEL' not present yet -- run 'juju add-model $MODEL' first (spaces are per-model)" + echo + echo "Summary: model-absent (run add-model, then re-run this check)" + exit 2 +fi +pass "model '$MODEL' present" + +echo +echo "=== spaces in model '$MODEL' (want six; none of the five stale) ===" +SPJSON="$(juju spaces -m "$MODEL" --format json)" || { fail "juju spaces failed (auth/macaroon? re-login or append &2 + exit 2 +fi + +# --- The six MAAS spaces / planes (D-052 / D-053). --- +PLANE_CIDRS=( "10.12.4.0/22" "10.12.8.0/22" "10.12.12.0/22" "10.12.16.0/22" "10.12.32.0/22" "10.12.36.0/22" ) +declare -A PLANE_NAME=( + ["10.12.4.0/22"]="provider-public" + ["10.12.8.0/22"]="metal-admin" + ["10.12.12.0/22"]="metal-internal" + ["10.12.16.0/22"]="data-tenant" + ["10.12.32.0/22"]="storage" + ["10.12.36.0/22"]="replication" +) +SPACES6=( provider-public metal-admin metal-internal data-tenant storage replication ) + +# Names that MUST be gone after the D-052 / D-053 cutover (deploy fails or mis-binds if any reappear). +STALE_SPACES=( provider metal data fabric-data lbaas ) + +# Gateways: only provider-public and metal-admin route; the other four are gw=none. +declare -A PLANE_GW=( ["10.12.4.0/22"]="10.12.4.1" ["10.12.8.0/22"]="10.12.8.1" ) + +# The four non-API, non-PXE planes whose host NICs MAAS must have provisioned. +DATA_PLANE_CIDRS=( "10.12.12.0/22" "10.12.16.0/22" "10.12.32.0/22" "10.12.36.0/22" ) + +# metal-internal is a TAGGED VLAN bridged on the metal fabric; host links land on br-internal. +METAL_INTERNAL_CIDR="10.12.12.0/22" +METAL_INTERNAL_VID="103" +METAL_INTERNAL_IFACE="br-internal" + +# The four KVM hosts: system_id -> hostname -> last host octet (.40-.43). +SYSIDS=( 4na83t qdbqd6 h8frng tmsafc ) +declare -A SYSID_HOST=( [4na83t]=openstack0 [qdbqd6]=openstack1 [h8frng]=openstack2 [tmsafc]=openstack3 ) +declare -A SYSID_OCTET=( [4na83t]=40 [qdbqd6]=41 [h8frng]=42 [tmsafc]=43 ) + +# Triple HA VIPs (D-020 + D-052): each API charm carries provider/admin/internal columns, +# matching last octet, in the .50-.60 band. 11 clustered API charms. +VIP_PREFIX_PROVIDER="10.12.4" +VIP_PREFIX_ADMIN="10.12.8" +VIP_PREFIX_INTERNAL="10.12.12" +VIP_OCTET_MIN=50 +VIP_OCTET_MAX=60 +VIP_COUNT_EXPECT=11 + +# --- tiny read-only helpers --- + +# need_jq: jq is required (present on the jumphost). Returns non-zero if absent. +need_jq() { + command -v jq >/dev/null 2>&1 || { echo "FAIL: jq not found on PATH (jumphost should have it)" >&2; return 1; } +} + +# fourth_octet : echo the last dotted octet of an IPv4 address. +fourth_octet() { local ip="$1"; echo "${ip##*.}"; } diff --git a/scripts/osd-blank-check.sh b/scripts/osd-blank-check.sh new file mode 100644 index 0000000..74db03e --- /dev/null +++ b/scripts/osd-blank-check.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# scripts/osd-blank-check.sh +# +# Read-only OSD secondary-disk blank check on the libvirt host (needs sudo for qemu-img). +# Confirms each host VM's OSD disk is wiped blank before redeploy so Ceph re-bootstraps clean. +# Override the image dir with IMGDIR=... if needed. +# +# Pinned host list comes from scripts/lib-net.sh. +# Exit codes: 0 all blank | 1 a disk looks non-blank | 2 a disk image missing + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true +IFS=$'\n\t' + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" + +IMGDIR="${IMGDIR:-/var/lib/libvirt/images}" +RC=0 +for sid in "${SYSIDS[@]}"; do + h="${SYSID_HOST[$sid]}" + img="$IMGDIR/${h}-1.qcow2" + echo "== $h ($img) ==" + if [ ! -f "$img" ]; then echo " FAIL: image not found"; RC=2; continue; fi + info="$(sudo qemu-img info "$img")" + echo "$info" | grep -E 'virtual size|disk size' | sed 's/^/ /' + dsize="$(echo "$info" | awk -F': ' '/disk size/{print $2; exit}')" + case "$dsize" in + *KiB*|*bytes*) : ;; + *) echo " WARN: disk size '$dsize' looks non-blank (expected KiB)"; if [ "$RC" -eq 0 ]; then RC=1; fi ;; + esac +done +echo +echo "expect: virtual 512 GiB, disk ~200 KiB (blank). RC=$RC" +exit "$RC" diff --git a/scripts/pre-flight-checks.sh b/scripts/pre-flight-checks.sh index 84eefdc..3a80942 100644 --- a/scripts/pre-flight-checks.sh +++ b/scripts/pre-flight-checks.sh @@ -1,51 +1,158 @@ #!/usr/bin/env bash # scripts/pre-flight-checks.sh # -# STATUS: PLACEHOLDER — drafted alongside deploy runbook. +# Pre-deploy sanity gate. Read-only; no state changes. Run from inside the repo +# (or set REPO=) BEFORE 'juju add-model' / 'juju deploy'. Surfaces issues that +# would cause the deploy to fail or mis-bind during settle. # -# Pre-deploy sanity check. Reads-only; no state changes. Run before -# `juju deploy` to surface issues that would cause the deploy to fail -# during settle. +# Covers: +# - repo HEAD / cleanliness (informational) +# - octavia-pki overlay sanity (5 keys + ASCII; no key material printed) +# - triple-VIP validator (provider/admin/internal columns, aligned, .50-.60) +# - MAAS six-plane layout resolved BY CIDR (id/vid/gw/dns; metal-internal VID 103) +# - per-host data/storage NIC links resolved BY CIDR (metal-internal on br-internal) +# - the four KVM nodes Ready / power state # -# Exit codes: -# 0 all checks pass -# 1 fatal — do not deploy -# 2 warning — review then decide +# NOT covered here (by design): +# - juju per-model SPACE names -> scripts/juju-spaces-check.sh (runs AFTER add-model) +# - YAML / deploy-plan validity -> 'juju deploy --dry-run' (phase-01 Step 1.2) +# - OSD secondary-disk blank -> scripts/osd-blank-check.sh (needs sudo) +# +# Pinned values come from scripts/lib-net.sh (single source of truth). +# Exit codes: 0 all checks pass | 1 fatal (do NOT deploy) | 2 warning (review then decide) set -euo pipefail - -# Strict mode hardening shopt -s inherit_errexit 2>/dev/null || true IFS=$'\n\t' +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" + FATAL=0 WARN=0 +fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } +warn() { echo "WARN: $*" >&2; WARN=$((WARN+1)); } +pass() { echo "PASS: $*"; } +note() { echo "NOTE: $*"; } +hdr() { echo; echo "=== $* ==="; } -fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } -warn() { echo "WARN: $*" >&2; WARN=$((WARN+1)); } -pass() { echo "PASS: $*"; } -note() { echo "NOTE: $*"; } +finish() { + echo + echo "Summary: ${FATAL} fatal, ${WARN} warning" + if [ "$FATAL" -gt 0 ]; then exit 1 + elif [ "$WARN" -gt 0 ]; then exit 2 + fi + exit 0 +} -# TODO during drafting: -# - Juju controller reachable -# - MAAS API reachable; machines in expected state -# - NetBox reachable; VR0 DC0 prefixes/VLANs present (use --verify-only on imports) -# - jumphost /etc/hosts contains all expected API VIP hostnames -# - All KVM VMs (openstack0-3) reachable and Ready in MAAS -# - capi-mgmt.maas k3s healthy -# - Vault unseal keys present and readable -# - Disk space on /var/lib/libvirt/images sufficient for snapshots -# - bundle.yaml parses as valid YAML -# - overlay parses as valid YAML -# - Channel pins in bundle resolvable on Charmhub +MAAS_PROFILE="${MAAS_PROFILE:-admin}" +REPO="${REPO:-$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel 2>/dev/null || dirname "$SCRIPT_DIR")}" +cd "$REPO" || { fail "cannot cd to REPO=$REPO"; finish; } -note "Placeholder pre-flight script — not yet implemented." +# --------------------------------------------------------------------------- +hdr "Repo (informational)" +note "REPO=$REPO" +note "HEAD: $(git --no-pager log --oneline -1 2>/dev/null || echo '(not a git tree)')" +DIRTY="$( { git status --porcelain 2>/dev/null || true; } | wc -l | tr -d ' ')" +if [ "$DIRTY" = "0" ]; then note "working tree clean"; else warn "working tree has $DIRTY modified file(s)"; fi -echo -echo "Summary: ${FATAL} fatal, ${WARN} warning" -if [[ $FATAL -gt 0 ]]; then - exit 1 -elif [[ $WARN -gt 0 ]]; then - exit 2 +# --------------------------------------------------------------------------- +hdr "CHECK 0: octavia-pki overlay (no key material printed)" +OVL="overlays/octavia-pki.yaml" +if [ -f "$OVL" ]; then + KEYS="$(grep -cE 'lb-mgmt-' "$OVL" || true)" + if [ "$KEYS" -eq 5 ]; then pass "overlay present with 5 lb-mgmt-* keys"; else fail "overlay has $KEYS lb-mgmt-* keys (want 5)"; fi + if LC_ALL=C grep -qP '[^\x00-\x7F]' "$OVL"; then fail "overlay contains non-ASCII bytes"; else pass "overlay ASCII clean"; fi +else + fail "MISSING $OVL (gitignored secret; place it or regenerate via runbook Step 1.0-GEN before deploy)" fi -exit 0 + +# --------------------------------------------------------------------------- +hdr "CHECK 1: bundle VIPs -- TRIPLE column .${VIP_OCTET_MIN}-.${VIP_OCTET_MAX} (provider/admin/internal)" +if [ ! -f bundle.yaml ]; then + fail "bundle.yaml not found in $REPO" +else + VIPLINES="$(grep -cE '^[[:space:]]+vip:' bundle.yaml || true)" + if [ "$VIPLINES" -eq "$VIP_COUNT_EXPECT" ]; then pass "vip: line count = $VIPLINES"; else warn "vip: line count = $VIPLINES (want $VIP_COUNT_EXPECT)"; fi + VIPOUT="$(awk -v pp="$VIP_PREFIX_PROVIDER" -v pa="$VIP_PREFIX_ADMIN" -v pi="$VIP_PREFIX_INTERNAL" \ + -v lo="$VIP_OCTET_MIN" -v hi="$VIP_OCTET_MAX" ' + BEGIN{ok=0;bad=0} + /^[[:space:]]+vip:/{ + v=$0; sub(/^[^"]*"/,"",v); sub(/".*/,"",v); n=split(v,a," "); + if(n!=3){print " MALFORMED(not 3 IPs): " $0; bad++; next} + if(index(a[1],pp".")!=1){print " WRONG provider col: " $0; bad++; next} + if(index(a[2],pa".")!=1){print " WRONG admin col: " $0; bad++; next} + if(index(a[3],pi".")!=1){print " WRONG internal col: " $0; bad++; next} + split(a[1],x,"."); split(a[2],y,"."); split(a[3],z,"."); + if(x[4]!=y[4]||y[4]!=z[4]){print " UNALIGNED last octet: " $0; bad++; next} + if(x[4]+0hi){print " OUT-OF-RANGE octet: " $0; bad++; next} + ok++ + } + END{print "RESULT " ok " " bad} + ' bundle.yaml)" + echo "$VIPOUT" | grep -v '^RESULT ' || true + VOK="$(echo "$VIPOUT" | awk '/^RESULT /{print $2}')" + VBAD="$(echo "$VIPOUT" | awk '/^RESULT /{print $3}')" + if [ "${VOK:-0}" -eq "$VIP_COUNT_EXPECT" ] && [ "${VBAD:-1}" -eq 0 ]; then + pass "aligned triple VIPs OK=$VOK bad=$VBAD" + else + fail "VIP validation OK=${VOK:-0} bad=${VBAD:-?} (want OK=$VIP_COUNT_EXPECT bad=0)" + fi +fi + +# --------------------------------------------------------------------------- +hdr "MAAS reachability gate (read-only)" +need_jq || finish +SUBJSON="$(maas "$MAAS_PROFILE" subnets read 2>/dev/null)" || { fail "MAAS unreachable: 'maas $MAAS_PROFILE subnets read' failed"; finish; } +pass "MAAS reachable (profile=$MAAS_PROFILE)" + +# --------------------------------------------------------------------------- +hdr "CHECK 3: six planes resolved BY CIDR (id/vid/gw/dns)" +present=0 +for c in "${PLANE_CIDRS[@]}"; do + row="$(printf '%s' "$SUBJSON" | jq -r --arg c "$c" '.[] | select(.cidr==$c) | "id=\(.id) vid=\(.vlan.vid // 0) gw=\(.gateway_ip // "none") dns=\(.dns_servers|tostring)"')" + if [ -z "$row" ]; then fail "plane ${PLANE_NAME[$c]} ($c) NOT FOUND"; continue; fi + present=$((present+1)) + printf " %-15s %-16s %s\n" "${PLANE_NAME[$c]}" "$c" "$row" + gw="$(printf '%s' "$SUBJSON" | jq -r --arg c "$c" '.[]|select(.cidr==$c)|.gateway_ip // "none"')" + exp="${PLANE_GW[$c]:-none}" + if [ "$gw" != "$exp" ]; then + if [ "$exp" = "none" ]; then warn "${PLANE_NAME[$c]} has gateway $gw (want none -- spurious-gw defect class, D-052)"; else fail "${PLANE_NAME[$c]} gateway=$gw (want $exp)"; fi + fi +done +if [ "$present" -eq 6 ]; then pass "all six planes present (by CIDR)"; else fail "only $present/6 planes present"; fi +mivid="$(printf '%s' "$SUBJSON" | jq -r --arg c "$METAL_INTERNAL_CIDR" '.[]|select(.cidr==$c)|.vlan.vid // 0')" +if [ "$mivid" = "$METAL_INTERNAL_VID" ]; then pass "metal-internal is VID $METAL_INTERNAL_VID"; else fail "metal-internal VID=$mivid (want $METAL_INTERNAL_VID)"; fi +note "stale-NAME check is juju-side (run scripts/juju-spaces-check.sh after add-model)" + +# --------------------------------------------------------------------------- +hdr "CHECK 2: data/storage NIC links BY CIDR (per host; expect octet .40-.43)" +for sid in "${SYSIDS[@]}"; do + echo " == $sid (${SYSID_HOST[$sid]}, octet .${SYSID_OCTET[$sid]}) ==" + IFJSON="$(maas "$MAAS_PROFILE" interfaces read "$sid" 2>/dev/null)" || { fail "cannot read interfaces for $sid"; continue; } + for c in "${DATA_PLANE_CIDRS[@]}"; do + line="$(printf '%s' "$IFJSON" | jq -r --arg c "$c" '.[] as $if | $if.links[]? | select(.subnet.cidr==$c) | "\($if.name) \(.ip_address // "(no-ip)")"' | head -1)" + if [ -z "$line" ]; then fail "$sid missing link on ${PLANE_NAME[$c]} ($c)"; continue; fi + ifname="${line%% *}"; ip="${line##* }" + printf " %-14s -> %-16s %s\n" "$ifname" "$c" "$ip" + if [ "$c" = "$METAL_INTERNAL_CIDR" ] && [ "$ifname" != "$METAL_INTERNAL_IFACE" ]; then + fail "$sid metal-internal on '$ifname' (want $METAL_INTERNAL_IFACE)" + fi + o="$(fourth_octet "$ip")" + if [ "$o" != "${SYSID_OCTET[$sid]}" ]; then warn "$sid ${PLANE_NAME[$c]} ip=$ip (want last octet .${SYSID_OCTET[$sid]})"; fi + done +done + +# --------------------------------------------------------------------------- +hdr "CHECK 4: the four KVM hosts -- status / power" +MJSON="$(maas "$MAAS_PROFILE" machines read 2>/dev/null)" || { fail "cannot read machines"; finish; } +for sid in "${SYSIDS[@]}"; do + row="$(printf '%s' "$MJSON" | jq -r --arg s "$sid" '.[]|select(.system_id==$s)|"\(.hostname) \(.status_name) power=\(.power_state)"')" + if [ -z "$row" ]; then fail "host $sid not found in MAAS"; continue; fi + echo " $row" + st="$(printf '%s' "$MJSON" | jq -r --arg s "$sid" '.[]|select(.system_id==$s)|.status_name')" + if [ "$st" = "Ready" ]; then pass "$sid Ready"; else fail "$sid status=$st (want Ready)"; fi +done + +finish