diff --git a/docs/v1-redeploy-changelog.md b/docs/v1-redeploy-changelog.md index bbc1021..ff41367 100644 --- a/docs/v1-redeploy-changelog.md +++ b/docs/v1-redeploy-changelog.md @@ -119,6 +119,7 @@ - **Juju spaces are per-model.** `juju spaces` / `juju reload-spaces` cannot run until after `juju add-model`; the old phase-01 CHECK 5 ran pre-model and failed with "model not found". Split into `juju-spaces-check.sh`, gated to run post-add-model. +<<<<<<< HEAD - **Default-space globally poisons network-get (deploy root cause).** The full D-052 binding deploy failed universally (`network-get ... ERROR space "metal" not found`, @@ -191,3 +192,5 @@ - Pending next artifact: the Strategy-B interface-carve script (built once all four are Ready; bridge_type pulled verbatim from captured release JSON) -> then consolidate into `runbooks/phase-00b-host-reenrollment.md`. +======= +>>>>>>> b67a7082af31ddead1249b487b6c94098dd290bc diff --git a/scripts/deploy-watch.sh b/scripts/deploy-watch.sh new file mode 100644 index 0000000..9af4aae --- /dev/null +++ b/scripts/deploy-watch.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# scripts/deploy-watch.sh [MODEL] [INTERVAL] +# +# Compact "signal" watch for a deploy/settle arc: machine + unit state COUNTS plus a +# named list of principal units in error/blocked. Does NOT descend into subordinates; +# does NOT replace the phase gates. Read-only. Companion to the "detail" window +# (`juju status -m MODEL --watch 5s`). +# +# Usage: scripts/deploy-watch.sh openstack 15 +# Stop with Ctrl-C. Tolerant of transient juju-status failures during settle. +# +# Health-at-a-glance: the error/blocked section stays EMPTY until the expected late +# blocks (vault "needs init", octavia "awaiting configure-resources"). +# +# Exit codes: 2 if jq is missing; otherwise runs until interrupted. + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" +need_jq || exit 2 + +MODEL="${1:-openstack}" +INTERVAL="${2:-15}" + +while :; do + ts="$(date '+%H:%M:%S')" + J="$(juju status -m "$MODEL" --format json 2>/dev/null)" || J="" + if [ -z "$J" ]; then + printf '[%s] juju status unavailable (transient?) -- retry in %ss\n' "$ts" "$INTERVAL" + sleep "$INTERVAL"; continue + fi + + mach="$(printf '%s' "$J" | jq -r '.machines // {} | to_entries[] | .value."juju-status".current' \ + | sort | uniq -c | awk '{printf "%s=%s ",$2,$1}')" + unit="$(printf '%s' "$J" | jq -r '.applications // {} | to_entries[] | .value.units // {} | to_entries[] | .value."workload-status".current' \ + | sort | uniq -c | awk '{printf "%s=%s ",$2,$1}')" + prob="$(printf '%s' "$J" | jq -r ' + .applications // {} | to_entries[] | .value.units // {} | to_entries[] + | select((.value."workload-status".current=="blocked") + or (.value."workload-status".current=="error") + or (.value."juju-status".current=="error")) + | " \(.key) [\(.value."workload-status".current)] \(.value."workload-status".message // "")"')" + + echo "================ [$ts] model=$MODEL ================" + echo "machines: ${mach:-(none)}" + echo "units: ${unit:-(none)}" + if [ -n "$prob" ]; then echo "error/blocked principals:"; echo "$prob"; else echo "error/blocked principals: (none)"; fi + sleep "$INTERVAL" +done