diff --git a/.claude/hooks/guard-destructive.py b/.claude/hooks/guard-destructive.py
new file mode 100644
index 0000000..4b66798
--- /dev/null
+++ b/.claude/hooks/guard-destructive.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+.claude/hooks/guard-destructive.py -- PreToolUse belt-and-suspenders for the
+jumphost (2026-07-03). Settings deny/ask rules are the first line; this hook
+exists because (a) a hook exit-2 blocks BEFORE permission evaluation in every
+permission mode, and (b) Bash settings-rule enforcement has a documented
+reliability history upstream. Blocks the NEVER class and secret-file shell
+reads that Read() rules cannot see (arbitrary subprocess reads).
+
+stdin: PreToolUse JSON. exit 0 = no opinion (permission rules proceed);
+exit 2 = hard block (stderr shown to Claude). ASCII + LF.
+Offline test: tests/claude-guard/run-tests.sh.
+"""
+import json
+import re
+import sys
+
+NEVER = [
+    (r"vault\s+operator\s+(init|rekey|generate-root)",
+     "one-shot vault operation: operator-only, from the runbook, VERBATIM (DOCFIX-006/D-069)"),
+    (r"juju\s+destroy-controller",
+     "controller destruction is out of scope for any session on this host"),
+    (r"\bmaas\s+list\b",
+     "prints the MAAS API key (DOCFIX-016); use 'maas admin ...' directly"),
+    (r"git\s+push\s+(--force|-f)\b",
+     "force-push is banned on this repo"),
+    (r"(cat|less|more|head|tail|cp|scp|base64|xxd|od|strings)\b[^|;&]*"
+     r"(vault-init/|as-executed/|-cred\.txt|appcred)",
+     "secret-adjacent file: never read key/cred material into context (whitelist-print rule)"),
+    (r"rm\s+-rf\s+(/|~)\s*$",
+     "catastrophic rm"),
+]
+
+
+def main():
+    try:
+        data = json.load(sys.stdin)
+    except Exception:
+        return 0  # malformed input: no opinion; permission rules still apply
+    cmd = (data.get("tool_input") or {}).get("command", "") or ""
+    for rx, why in NEVER:
+        if re.search(rx, cmd):
+            sys.stderr.write(
+                "BLOCKED by .claude/hooks/guard-destructive.py: %s\n" % why)
+            return 2
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 0000000..e3b13d4
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,54 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(git status*)", "Bash(git diff*)", "Bash(git log*)", "Bash(git pull*)",
+      "Bash(git grep*)", "Bash(grep *)", "Bash(ls *)", "Bash(cat scripts/*)",
+      "Bash(cat runbooks/*)", "Bash(cat docs/*)", "Bash(jq *)",
+      "Bash(juju status*)", "Bash(juju models*)", "Bash(juju machines*)",
+      "Bash(juju spaces*)", "Bash(juju show-*)", "Bash(juju info *)",
+      "Bash(maas admin * read*)",
+      "Bash(openstack * list*)", "Bash(openstack * show*)",
+      "Bash(bash scripts/repo-lint.sh*)",
+      "Bash(bash scripts/run-tests-all.sh*)",
+      "Bash(bash scripts/cloud-assert.sh)",
+      "Bash(bash scripts/preflight.sh*)",
+      "Bash(python3 scripts/repo_lint.py*)",
+      "Bash(python3 scripts/provider-bundle-check.py*)",
+      "Bash(bash tests/*)"
+    ],
+    "ask": [
+      "Bash(juju destroy-model *)", "Bash(juju remove-machine *)",
+      "Bash(juju remove-application *)", "Bash(juju remove-unit *)",
+      "Bash(juju run *)", "Bash(juju ssh *)", "Bash(juju exec *)",
+      "Bash(juju config * *=*)", "Bash(juju attach-resource *)",
+      "Bash(juju deploy *)", "Bash(juju add-model *)",
+      "Bash(maas admin machine delete *)", "Bash(maas admin * update*)",
+      "Bash(maas admin * create*)", "Bash(maas admin * release*)",
+      "Bash(openstack * create*)", "Bash(openstack * delete*)",
+      "Bash(openstack * set*)", "Bash(openstack * unset*)",
+      "Bash(* --apply*)",
+      "Bash(git commit*)", "Bash(git push*)",
+      "Bash(sudo *)", "Bash(virsh *)", "Bash(rm *)"
+    ],
+    "deny": [
+      "Bash(vault operator init*)", "Bash(vault operator rekey*)",
+      "Bash(vault operator generate-root*)",
+      "Bash(juju destroy-controller *)",
+      "Bash(maas list*)",
+      "Bash(git push --force*)", "Bash(git push -f*)",
+      "Read(~/vault-init/**)", "Read(~/as-executed/**)",
+      "Read(~/tenant-*/**)", "Read(**/*-cred.txt)", "Read(**/*appcred*)",
+      "Edit(~/vault-init/**)"
+    ]
+  },
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          { "type": "command", "command": "python3 \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/guard-destructive.py" }
+        ]
+      }
+    ]
+  }
+}
diff --git a/.claude/skills/openstack-cloud-ops/SKILL.md b/.claude/skills/openstack-cloud-ops/SKILL.md
new file mode 100644
index 0000000..c7c78ec
--- /dev/null
+++ b/.claude/skills/openstack-cloud-ops/SKILL.md
@@ -0,0 +1,123 @@
+---
+name: openstack-cloud-ops
+description: "Operate, install, extend, and troubleshoot the Omega Cloud - a commercial multi-tenant Charmed OpenStack (Caracal 2024.1) deployment managed with Juju and MAAS, with Vault TLS, OVN, Ceph, Octavia, and Magnum/CAPI tenant Kubernetes. Use this skill for ANY work touching OpenStack, Juju, MAAS, Magnum, CAPI, Ceph, OVN, Octavia, Keystone, Vault-for-OpenStack, tenant onboarding, or the openstack-caracal-ipv4 repository - including writing or reviewing bash/python operational scripts, debugging failed deploys or cluster creates, runbook work, design-decision (D-NNN) discussion, and incident triage. Use it even for seemingly simple OpenStack questions: this deployment has strict operating discipline and known charm traps that make generic answers wrong."
+---
+
+# openstack-cloud-ops
+
+Operating skill for the Omega Cloud: a commercial, multi-tenant, tenant
+self-administered OpenStack cloud. Current phase: single-DC virtual rehearsal
+("testcloud", VR0 DC0) on four KVM hosts, rehearsing a future bare-metal
+multi-datacenter deployment ("Roosevelt"). The governing design constraint is
+MINIMIZE DELTA TO ROOSEVELT: the runbooks and scripts are primary deliverables
+alongside the running cloud, so transferable answers beat quick fixes.
+
+## Step 0 - locate the source of truth
+
+The repository `openstack-caracal-ipv4` (GitBucket, git.baldurkeep.com) is
+authoritative for everything: bundle, runbooks, scripts, design decisions,
+as-built values. This skill is a discipline-and-routing layer OVER that repo,
+not a substitute for it.
+
+1. Look for a local clone (common paths: `~/openstack-caracal-ipv4`, a repo
+   dir in the working tree, `/home/claude/repo`). If found, `git log -1` to
+   note HEAD and work from it.
+2. No clone and you have shell + network: ask before cloning
+   (`https://git.baldurkeep.com/git/OpenStack/openstack-caracal-ipv4.git`).
+   The repo may be private; if the clone fails, ask the operator to provide
+   access or the relevant files.
+3. No clone obtainable (e.g. chat without sandbox network): say so, ask the
+   operator to paste the relevant runbook/script, and proceed only on what is
+   actually in front of you.
+
+**Divergence rule:** if this skill and repo HEAD disagree, the repo wins -
+but FLAG the divergence to the operator rather than silently following either.
+The repo is a living draft; this skill's invariants (discipline, hardening)
+change slowly, its facts (IPs, versions, phase status) go stale fast.
+
+## Step 1 - detect the environment
+
+- **Live shell to the jumphost / infra** (Claude Code on `vopenstack-jesse` or
+  similar): you may RUN read-only audits directly. Every mutation remains
+  individually human-gated - present the command, state what it changes, wait
+  for approval. A live shell relaxes the transport, never the discipline.
+- **Chat / no infra shell**: operate the gated copy-paste model - prepare
+  labeled blocks, the operator runs them and pastes output back. Never assume
+  a block ran or succeeded; wait for the pasted evidence.
+
+Read `references/operating-discipline.md` before doing either.
+
+## The three hard operating rules (non-negotiable)
+
+1. **Execute only the current runbook step, exactly as written.** No added
+   scope, no adjacent improvements, no live re-architecture mid-step. Findings
+   and improvement ideas are LOGGED (changelog / D-NNN proposal), never
+   executed live mid-step.
+2. **Never use an inferred value.** No IP, ID, name, or scope goes into a
+   command unless it was measured this session or carried from confirmed
+   as-built. If a value would be inferred: stop and measure it. Never run a
+   destructive or session-altering command from memory without confirming it
+   is the minimal correct action for the current live state.
+3. **Prefer dynamic lookups over hardcoded literals.** Discover VIPs, project
+   names, IDs, and version sets at runtime. Where a literal is unavoidable it
+   is tagged and centralized (`scripts/lib-net.sh`, `lib-hosts.sh`), keyed by
+   stable identity (CIDR, hostname) - never by drifting IDs.
+
+Corollary that governs everything: **verify before mutate**. A read-only audit
+precedes every mutation; destructive and secret-handling steps are gated
+individually, never batched.
+
+## Routing - where to go for what
+
+| Task | Read first |
+|---|---|
+| Any command block, script, or paste block you are about to write | `references/script-authoring.md` |
+| Deploy / redeploy / teardown | repo `runbooks/README.md`, then the phase-NN runbook; conventions in `references/operating-discipline.md` |
+| Something is broken (triage, incidents) | `references/troubleshooting.md`, then repo `runbooks/appendix-A-troubleshooting.md` |
+| CAPI / Magnum / mgmt-VM recovery | repo `runbooks/ops-capi-recovery.md` |
+| Deliver ANY repo change (script, runbook, doc) | run `bash scripts/repo-lint.sh` + the touched script's `tests/<name>/run-tests.sh` BEFORE handing it over |
+| Pre-deploy gate (before add-model) | `bash scripts/preflight.sh` -- THE single entry; do not run the sub-gates piecemeal |
+| Is the cloud actually healthy? (post-deploy, post-restart, pre-change baseline, incident) | `bash scripts/cloud-assert.sh` (add `--capture` at deploy completion for the committed BOM) |
+| Full-cloud restart after outage/maintenance | repo `runbooks/ops-restart-procedure.md` |
+| Starting any consequential live session | `bash scripts/run-logged.sh <label>` first (as-executed log; docs/as-executed-log-convention.md) |
+| Credential exposures / security TODOs | repo `docs/security-ledger.md` -- add a row, never only a script comment |
+| Tenant onboarding / tenant self-service | repo `scripts/tenant-onboard.sh` + `runbooks/tenant-onboarding-v2-DRAFT.md` + `appendix-C/D` |
+| Network / plane / IPAM questions | `references/environment.md`, repo `scripts/lib-net.sh`, NetBox (the IPAM apex) |
+| ANY change request to a built surface | grep repo `docs/design-decisions.md` for the governing D-NNN FIRST - PROPOSED/OPEN means the operator has not ruled: present options, do not implement |
+| Why is it built this way? / proposing changes | repo `docs/design-decisions.md` (D-NNN); grep before assigning a new number |
+| Versions, channels, pins | repo `runbooks/appendix-B-asbuilt-version-lock.md` |
+| Environment facts (hosts, repo, planes) | `references/environment.md` |
+
+## Standard loops (repeatable session shapes)
+
+**Session bootstrap (jumphost):** `git -C ~/openstack-caracal-ipv4 pull` ->
+`bash scripts/repo-lint.sh` (0 fail expected) -> if touching the live cloud,
+`bash scripts/run-logged.sh <label>` to open the logged shell. Repo HEAD and a
+clean lint are the preconditions for everything else.
+
+**Change-delivery loop:** grep for prior art (zeroth decision) -> grep
+design-decisions for the governing D-NNN -> edit -> `bash scripts/repo-lint.sh`
+-> run the touched script's harness (create one if missing -- no script change
+ships without its harness) -> deliver as repo-relative ZIP + a changelog entry
+with a per-item revert. Under blanket approval, the changelog IS the review
+surface: every item states what, why (evidence), and how to revert.
+
+**Deploy loop:** phase-00 runbook (D-061 destroy path) -> `bash
+scripts/preflight.sh` PASS -> phase-01..08 gated -> `bash
+scripts/cloud-assert.sh --capture` -> commit the asbuilt/ BOM.
+
+**Incident loop:** capture the verbatim error -> `bash scripts/cloud-assert.sh`
+(the service-own-verdict sweep localizes the layer) -> appendix-A by exact
+message -> recorded fix, gated -> log the finding (new root causes become
+appendix-A/DOCFIX material).
+
+## Posture
+
+- This is a commercial multi-tenant cloud with HARD tenant isolation (SCS
+  Domain Manager persona). Treat tenant-visible surfaces and cross-domain
+  boundaries as security-relevant in every change.
+- The operator community here values debate and industry best practice over
+  quick fixes. Push back with sources when you disagree; own mistakes plainly
+  and concisely. Fabricated flags, values, or version numbers are the cardinal
+  sin - if you have not verified an option name or version, say so and verify.
+- Responses stay concise. Decisions get explicit rationale.
diff --git a/.claude/skills/openstack-cloud-ops/references/environment.md b/.claude/skills/openstack-cloud-ops/references/environment.md
new file mode 100644
index 0000000..53deeb8
--- /dev/null
+++ b/.claude/skills/openstack-cloud-ops/references/environment.md
@@ -0,0 +1,101 @@
+# Environment - Omega Cloud (VR0 DC0 testcloud)
+
+Facts here are ANCHORS, not command inputs. Anything marked (verify) must be
+re-measured or re-read from the repo/live cloud before use in a command -
+hard rule 2 applies. Snapshot date: 2026-07. The repo is fresher than this file.
+
+## The two deployments
+
+- **Testcloud (now):** VR0 DC0, four KVM host VMs (openstack0-3) on a single
+  hypervisor, managed by MAAS + Juju. Single-DC virtual rehearsal.
+- **Roosevelt (future):** bare-metal, multi-DC, commercial production
+  (3310 Roosevelt Blvd, Eugene OR). Dedicated node roles (gateway/controller/
+  compute split) - unlike the hyperconverged testcloud. Every design choice is
+  judged by its transfer to Roosevelt.
+
+## Stack (verify against appendix-B for pins)
+
+Charmed OpenStack Caracal 2024.1 - Juju 3.6, MAAS 3.7.2, Vault TLS (charm-pki
+root CA), OVN 24.03, Ceph Squid, Octavia (amphora), Barbican,
+mysql-innodb-cluster, RabbitMQ, Magnum + magnum-capi-helm driver + azimuth
+capi-helm-charts (kubeadm engine), in-cloud single-homed CAPI mgmt VM
+(capi-mgmt-v2, k8s-snap, D-035). NetBox is the IPAM apex: never hand-edit
+downstream MAAS or overlays for network values.
+
+## Control points
+
+- **Jumphost:** `vopenstack-jesse` - all live commands run here. Has juju,
+  the openstack CLI (SNAP - cannot read /tmp; use $HOME), jq, kubectl.
+- **Repo:** `https://git.baldurkeep.com/OpenStack/openstack-caracal-ipv4`
+  (web) / `.../git/OpenStack/openstack-caracal-ipv4.git` (clone). Operator
+  commits from Windows (PowerShell / GitHub Desktop - strips exec bits;
+  `.gitattributes` pins LF); the jumphost only pulls.
+- **Juju model:** `openstack`. **MAAS profile:** `admin` (call
+  `maas admin ...` directly; NEVER `maas list` - it prints the API key).
+- **Management substrate (verify; NEVER touch in teardown):** the MAAS
+  machines hosting juju, lxd, and tailscale are hard-excluded from teardown
+  scripts. Resolve system_ids live via `scripts/lib-hosts.sh` - system_ids
+  are re-minted on every re-enrollment (DOCFIX-040).
+
+## The six network planes (D-052 / D-053; verify against scripts/lib-net.sh)
+
+| Plane | CIDR | Carries | Notes |
+|---|---|---|---|
+| provider-public | 10.12.4.0/22 | Public API VIPs + tenant FIPs (Pattern A, D-060) | gw .4.1; untagged |
+| metal-admin | 10.12.8.0/22 | MAAS PXE, operator/admin endpoint, default binding | gw .8.1; DC-local |
+| metal-internal | 10.12.12.0/22 | ALL service-to-service control (internal API, DB, MQ, Vault, peers) | tagged VID 103 via br-internal; no gw |
+| data-tenant | 10.12.16.0/22 | Tenant Geneve overlay | no gw |
+| storage | 10.12.32.0/22 | Ceph public | no gw |
+| replication | 10.12.36.0/22 | Ceph cluster (OSD replication) | no gw |
+
+- API VIPs: triple per clustered charm (provider/admin/internal), matching
+  last octet in the .50-.60 band, 11 clustered charms (verify count live).
+- Tenant pool: 10.20.0.0/16 (hybrid model D-016 - pool in NetBox, per-project
+  /24s Neutron-managed). Avoid collisions with capi-mgmt (10.20.0.0/24) and
+  existing tenant /24s - list live before allocating.
+- Provider NIC rule (D-057/D-060): the provider uplink must land in OVS
+  `br-ex`, never enslaved to a Linux bridge, and `br-ex` carries no L3 config.
+
+## Repo map (what lives where)
+
+- `bundle.yaml` - the canonical bundle; VIPs/units baked in for testcloud.
+- `runbooks/phase-00..08-*.md` - the gated deploy sequence, in order, each
+  ending in a hard gate. `runbooks/README.md` has the label conventions.
+- `runbooks/appendix-A-troubleshooting.md` - symptom->cause->fix index keyed
+  by D-NNN/DOCFIX-NNN. First stop for any known-looking failure.
+- `runbooks/appendix-B` - version lock. `appendix-C` - identity/RBAC.
+  `appendix-D` - Magnum trust model. `ops-capi-recovery.md` - CAPI/Magnum
+  post-deploy operations.
+- `docs/design-decisions.md` - the D-NNN architectural record (append-only
+  discipline; superseded entries stay, marked).
+- `scripts/` - phase scripts + `lib-net.sh` / `lib-hosts.sh` (pinned values)
+  + tenant onboarding/acceptance. `tests/<script>/` - offline fakebin
+  regression harnesses.
+- `policies/domain-manager-policy.yaml` + `policies/overrides.zip` - the SCS
+  Domain Manager RBAC override (D-051/D-064); the zip ships IN the bundle
+  (keystone resources, DOCFIX-071) and provider-bundle-check drift-guards it.
+- Operational tooling (2026-07 hardening set): `scripts/preflight.sh` (single
+  pre-deploy gate: lint -> bundle invariants -> Charmhub channel assert -> live
+  MAAS pre-flight), `scripts/repo-lint.sh`/`repo_lint.py` (static hygiene,
+  L1-L6), `scripts/cloud-assert.sh` (behavioral verifier + `--capture` BOM to
+  `asbuilt/<date>/`), `scripts/run-logged.sh` (as-executed session logger),
+  `scripts/channel_assert.py`. `runbooks/ops-restart-procedure.md` (full-cloud
+  restart). `docs/security-ledger.md` (exposure/obligation rows).
+  `logs/as-executed-index.md` (committed index; log content stays jumphost-only).
+- No KVM snapshot restore path exists (D-070 superseded D-012):
+  rebuild-from-runbooks IS the restore path; baselines come from cloud-assert
+  `--capture`.
+
+## Identity / tenancy model (see appendix-C/D and D-051, D-064, D-066)
+
+Domain-per-client. Operator provisions: domain + a domain `manager` (SCS
+Domain Manager persona - the plain `admin` role is NOT domain-confinable) +
+quotas. The tenant self-services everything inside: projects, users, roles
+(only member + load-balancer_member assignable - never admin/manager),
+app credentials, networks, templates, clusters. Magnum mints per-cluster
+trust app-creds carrying the trustor's roles frozen at mint time (D-039:
+trustor needs load-balancer_member or CAPO 403s on Octavia). Cluster create
+must run as a password identity, not an app-cred (trust-creation block,
+D-066). Every identity command is DOMAIN-QUALIFIED (`--domain`,
+`--user-domain`, `--project-domain`) - scope-default resolution silently
+lands in the wrong domain and 404s misleadingly.
diff --git a/.claude/skills/openstack-cloud-ops/references/operating-discipline.md b/.claude/skills/openstack-cloud-ops/references/operating-discipline.md
new file mode 100644
index 0000000..e235e6d
--- /dev/null
+++ b/.claude/skills/openstack-cloud-ops/references/operating-discipline.md
@@ -0,0 +1,108 @@
+# Operating discipline
+
+How work is executed on this cloud. These conventions exist because the
+runbooks and scripts ARE the product: a fix that works but is undocumented,
+unrepeatable, or Roosevelt-incompatible is a failure.
+
+## The gated execution model
+
+Every command block is labeled so a command line is never mistaken for prose,
+and so mutation risk is explicit (same convention as the repo runbooks):
+
+- `RUN -- <loc>` - the block CHANGES state; run it at <loc> (jumphost, a unit
+  via `juju ssh`, the mgmt VM...).
+- `CHECK (read-only) -- <loc>` - safe to re-run.
+- `GATE:` - hard stop. Do not proceed unless the stated condition holds.
+- `Expect:` - what a passing result looks like. Always state it: the operator
+  should never have to guess whether output is good.
+- `> CAUTION:` - destructive, secret-handling, or irreversible.
+
+Sequence discipline: read-only audit -> present the mutation -> operator
+approves/runs -> verify the result -> next step. One gated mutation at a time;
+never batch destructive steps. If output comes back unexpected, STOP and
+re-derive from the live state - do not improvise a correction inline.
+
+**Chat (no shell):** you prepare blocks, the operator runs and pastes output.
+Treat pasted output as the only evidence; a block you wrote but saw no output
+for did not happen.
+
+**Live shell:** you may execute `CHECK (read-only)` blocks yourself. `RUN` and
+`> CAUTION:` blocks still get presented and human-approved first - state what
+will change and why it is the minimal correct action.
+
+## Irreversible / one-shot / secret steps
+
+- Start every consequential session with `bash scripts/run-logged.sh <label>`
+  (opens a script(1)-logged shell to ~/as-executed/; index the session in
+  logs/as-executed-index.md -- content NEVER commits, the index always does).
+- Retrieve the exact prior working command from the as-executed log or runbook
+  VERBATIM. Never improvise vault init, unseal, authorize-charm, CA issuance,
+  or anything one-shot. (DOCFIX-006: a mis-redirected `vault operator init`
+  loses the unseal keys forever.)
+- Secrets never transit argv, clipboard, scrollback, or /tmp. Capture straight
+  to a 0600 file under $HOME with `umask 077`; unseal via hidden prompt (L4);
+  transfer via base64 pipe into a root-written 0600 file (L-P6-4). Never echo
+  a secret to verify it - verify by length/format from the file.
+- Never run `maas list` (prints the API key - DOCFIX-016). Never trust a juju
+  action's human-formatted output for a captured secret or cert - pull from
+  `--format json` (indented YAML block-scalars corrupt PEMs; DOCFIX-021).
+- Authorize charms with short-lived child tokens: `juju run` persists action
+  params in the operation log (DOCFIX-011).
+
+## Record-keeping: D-NNN / DOCFIX-NNN / BUNDLEFIX-NNN
+
+- `D-NNN` - design decisions (`docs/design-decisions.md`). Append-only:
+  superseded entries stay in place, marked, with the superseding entry
+  appended. `DOCFIX-NNN` - runbook fixes. `BUNDLEFIX-NNN` - bundle fixes.
+- ALWAYS grep the repo for the next free number before assigning one -
+  collisions have happened. State "next-free verified" when you assign.
+- Mid-task findings are logged as proposals, not acted on (hard rule 1).
+  A finding that changes a runbook becomes a DOCFIX; one that changes
+  architecture becomes a D-NNN with status PROPOSED until the operator rules.
+- Status vocabulary: PROPOSED/OPEN -> ADOPTED/DECIDED -> SUPERSEDED (by
+  D-MMM). Do not implement PROPOSED items.
+- Before acting on any change request, grep design-decisions for the
+  governing decision on that surface (and its dependents - e.g. a runbook
+  step may rely on the current state). Finding a PROPOSED/OPEN decision
+  means presenting its recorded options for a ruling, not picking one.
+
+## Delivery rules (files handed to the operator)
+
+- Multi-file changes ship as repo-relative ZIPs, never loose files (the
+  Windows/GitHub Desktop loose-file workflow misplaces them).
+- Everything committed is ASCII-only, LF-only. Validate before delivery:
+  non-ASCII with `grep -nP '[^\x00-\x7F]'` (or a Python byte read); CR bytes
+  with a Python `data.count(b"\x0d")` - a `grep $'\r'` false-positives on
+  `$r...` tokens. Non-ASCII in OpenStack config has caused silent daemon
+  failures (mod_wsgi UnicodeDecodeError).
+- On-box script delivery over `juju ssh` goes as a base64 pipe decoded to a
+  file, then the FILE is executed - never a raw heredoc (paste-mangling and
+  stdin-consumption both bite; see script-authoring on `read` vs pipes).
+- Windows-side steps are PowerShell-native (no bash heredocs, no backslash
+  continuations). The operator commits from Windows; the jumphost only pulls.
+
+- Credential exposures and security obligations get a ROW in
+  docs/security-ledger.md at discovery time (owner + status) - never only a
+  script-comment note; the ledger is reviewed at every phase-00 and handoff.
+- Under operator-granted blanket approval, the delivery contract is: implement,
+  then hand over ONE cumulative repo-relative ZIP plus a changelog where every
+  item states what changed, the evidence for why, and a per-item revert. The
+  changelog is the review surface; opinion-weighted calls are flagged as such.
+
+## Debate and correction norms
+
+- Challenge weak reasoning with sources; the operator prefers industry best
+  practice over the quick fix and will push back hard on unverified claims.
+  When challenged, verify (docs, source, live probe) rather than defend.
+- Own mistakes plainly, in one or two sentences, then fix. No self-flagellation.
+- When a prior decision looks wrong, propose superseding it through the D-NNN
+  process - do not quietly deviate from it.
+
+## Troubleshooting entry discipline
+
+Before any server-side hypothesis for "X used to work, now it doesn't" on a
+web UI: eliminate client state first (incognito window, ~10 seconds), then a
+server-side curl, THEN hypothesize. Before any command that acts on tenant
+resources: confirm which project/domain scope the shell holds
+(`openstack token issue`-level certainty) - `server create` and friends use
+ambient scope silently. Full triage method: references/troubleshooting.md.
diff --git a/.claude/skills/openstack-cloud-ops/references/script-authoring.md b/.claude/skills/openstack-cloud-ops/references/script-authoring.md
new file mode 100644
index 0000000..1bf6aea
--- /dev/null
+++ b/.claude/skills/openstack-cloud-ops/references/script-authoring.md
@@ -0,0 +1,183 @@
+# Script authoring - house style and hardening
+
+Every script and paste block in this project follows these rules. They are
+not style preferences: each one encodes a failure that actually happened.
+Read this in full before writing ANY bash or python for this cloud.
+
+## Zeroth decision: does it already exist?
+
+Before writing ANY operational script or check block, search the repo:
+`grep -rli <topic> scripts/ runbooks/` - the deploy/verify surface is heavily
+scripted and duplicating an existing script creates drift (e.g. the haproxy
+backend sweep already exists as `scripts/phase-03-core-verify.sh` 3.1b).
+Route to, extend, or fix the existing artifact; write new only when nothing
+covers the need, and say which search came up empty.
+
+## First decision: what kind of block is this?
+
+The error-handling regime depends on execution mode. Choosing wrong is itself
+a bug:
+
+1. **Executed script** (`bash script.sh`, own process): `set -uo pipefail`
+   at minimum; `set -e` acceptable and usually right, with the capture
+   caveats below. Exit codes are the interface.
+2. **Interactive paste block** (operator pastes into their shell): NEVER a
+   bare `exit` (it kills their shell) and no `set -e` (it can kill their
+   shell's options or abort mid-paste). Wrap the whole block in a subshell
+   `( { ...; } )` so a stray exit is contained; signal failure by printing
+   `FAIL: ...` lines the operator can read.
+3. **Verify/count-gate block** (greps that legitimately return zero matches):
+   run WITHOUT `set -e`, and end every count-grep with `|| true` - a zero
+   count is a valid answer, not an error (L1). `bash -n` cannot catch this;
+   it is behavior, not syntax.
+
+## The header contract (executed scripts)
+
+Every script opens with a comment block stating: path + argument synopsis;
+what it does (one paragraph, referencing the D-NNN/DOCFIX it implements);
+**whether it mutates anything** ("Mutates NOTHING" / what it changes and the
+gate protecting it); usage line; exit-code contract; and "ASCII + LF".
+House exit codes: `0` PROCEED, `1` HOLD (a gate failed), `2` precondition
+missing (tool absent, wrong model, helper not found). Then:
+
+    set -euo pipefail            # or set -uo pipefail; see regime above
+    shopt -s inherit_errexit 2>/dev/null || true
+    IFS=$'\n\t'
+
+Source shared constants instead of restating them:
+`. "$SCRIPT_DIR/lib-net.sh"` (planes, VIP bands, helpers) and `lib-hosts.sh`
+(hostnames, octets, system_id resolution). If a value you need is not in a
+lib, consider adding it there rather than inlining a literal.
+
+## Hardening rules (each one is a scar)
+
+**SIGPIPE races break guards in BOTH directions.** `cmd | grep -q X` under
+pipefail: on match, grep exits, the producer takes SIGPIPE (141), the pipeline
+reports failure despite the match. In an `... || die` verify this FALSE-DIES on
+success; in an `... && die` guard it FAILS OPEN -- the 2026-07 sweep found a
+duplicate-CIDR guard that let collisions through exactly this way. Treat every
+`| grep -q` on a live pipe as a defect regardless of which way the test points.
+
+**Pipefail + SIGPIPE race.** `cmd | grep -q X` under pipefail falsely fails:
+`grep -q` closes the pipe on first match, SIGPIPE (141) kills the producer
+(`juju ssh` especially). Capture, then test:
+
+    OUT=$(cmd 2>&1 || true)
+    grep -q "pattern" <<<"$OUT"
+
+**`set -e` kills id-captures silently.** `ID=$(openstack ... || die ...)` -
+if the subshell exits non-zero before your handler fires, `set -e` aborts the
+assignment line with no message. Append `|| true` to each capture, then
+validate the captured value explicitly (see whole-output validation below).
+
+**Whole-output validation, never extract-then-check.** Do not pipe raw output
+through `awk`/`grep` to extract a field and then test the fragment - a
+partial failure yields a plausible-looking fragment. Capture the WHOLE output,
+validate its shape (e.g. `is_id(){ [[ "$1" =~ ^[0-9a-f]{32}$ ]]; }` for a
+keystone id), and only then use it.
+
+**Centralize `</dev/null` in a wrapper, not per call.** The house pattern is a
+2-line helper (`rc()`, `rcap()`, `J()`) that appends `</dev/null 2>&1` once;
+every call site then stays clean and un-forgettable. Heredoc-payload ssh
+(`ssh ... bash -s <<'EOF'`) is the ONE exemption -- stdin IS the delivery there.
+
+**Inner stdin consumption.** Any `ssh`/`sudo`/`juju ssh` inside a heredoc,
+pipe, or loop eats the remaining stdin and truncates the block. Append
+`</dev/null` to EVERY inner invocation (`</dev/tty` only when it genuinely
+must prompt).
+
+**`read` and `... | bash` are mutually exclusive.** A script piped to bash
+has the pipe as stdin, so `read` returns empty at EOF - this once silently
+created a passwordless user. For paste-safety AND working prompts: base64-
+decode to a file, then run the file (it inherits the terminal stdin).
+
+**juju invocation shape.** `juju ssh -m <model>` - the `-m` flag goes BEFORE
+the target. `juju run <unit> <action>` output: use `--format json` for
+anything captured; confirm long actions via `juju show-operation <N>`, not
+the streamed log (a wait-timeout does not mean the hook failed).
+
+**Snap confinement.** The openstack CLI snap cannot read `/tmp` - stage files
+under `$HOME`. Same for `juju attach-resource` payloads.
+
+**Client output ordering.** `openstack -f value -c X -c Y` returns columns in
+ALPHABETICAL order, not flag order. When order matters: `-f json | jq`, or
+single-column queries. After any jq returning null, run `jq 'keys'` - key
+casing is command-specific (Title-Case in lists, hyphenated in quota show).
+
+**Environment isolation.** Any block that switches identity runs in a
+subshell that first unsets all OS_* vars:
+`( for v in $(env | awk -F= '/^OS_/{print $1}'); do unset "$v"; done; export ... )`
+Thread `OS_CACERT` explicitly into isolated subshells - it gets stripped.
+Confirm scope before acting: which project/domain the token holds, not which
+you intended.
+
+**Stable keys, not drifting IDs.** Look up MAAS subnets by CIDR, machines by
+hostname, CAPI CRs by LISTING then operating on the exact returned name
+(the OpenStackCluster suffix is random per create - a wrong-name patch
+silently no-ops). system_ids are re-minted on re-enrollment.
+
+**Verify the launched cmdline, not the config text.** For OpenStack debs run
+via LSB-init-wrapped systemd: a flag "present in a file" proves nothing.
+Gate on behavior - the init script's `show-args`, `ps -ww -C <daemon> -o args`
+on the live process, and probe under the daemon's RESTRICTED PATH
+(`env -i PATH=/usr/sbin:/usr/bin:/sbin:/bin sh -c 'command -v helm'`) - an
+interactive shell's PATH masks daemon-PATH failures.
+
+**sed is not a verifier.** A non-matching `sed -i` exits 0 having changed
+nothing. Assert the post-edit content; never trust sed's exit code as proof
+of the edit.
+
+**ASCII + LF, validated.** Non-ASCII: `grep -nP '[^\x00-\x7F]'` or a Python
+byte read. CR check: Python `data.count(b"\x0d")` - `grep $'\r'` false-
+positives on `$r...` tokens. Non-ASCII in conf.d has silently killed daemons.
+
+**Python helpers live in .py files** tested against fixtures - no inline
+python-in-bash beyond one-liners. Do not assume `jq` exists off-jumphost;
+gate on it (`need_jq`) or use python3.
+
+**Secrets in scripts:** whitelist-write to 0600 files under a 0700 dir
+(`umask 077` first), never echo, never argv, measure secret length from the
+file rather than asserting an expected length (this deployment's app-cred
+secrets are 86 chars, not the commonly assumed 43 - never hardcode either).
+
+## Testing: nothing ships on `bash -n`
+
+`bash -n` validates parse, not behavior - and most of the failures above are
+behavioral. Every nontrivial script gets an offline regression harness at
+`tests/<script-name>/run-tests.sh` in the established pattern:
+
+- `fakebin/` contains fake `juju` / `openstack` / `maas` / `kubectl` /
+  `ssh` executables that replay fixture output and log the calls they
+  receive; real coreutils stay real.
+- The harness sets `PATH="$BIN:$PATH"`, a scratch `HOME`, and env-injected
+  fixture paths, runs the target, and asserts BOTH the exit code and an
+  output regex per case: `run <want_rc> <regex> <label>`.
+- Cover: the happy path, each gate's failure branch (asserting HOLD not
+  crash), each precondition (exit 2), and any DOCFIX behavior the script
+  claims (e.g. "verify-first never creates a rule when one exists").
+- Read an existing harness (e.g. `tests/phase-07-conductor-graft/`) before
+  writing a new one; match its shape.
+
+For MUTATING scripts, the harness fakebin is STATEFUL: the fake `maas`/`juju`
+advances a phase file on each mutation call so post-mutation verification reads
+post-mutation reality (see tests/phase-00-teardown-d061 -- canary survival,
+decompose-detection, substrate-collision aborts). A `--no-prompt` flag on a
+destructive script exists FOR its harness, nothing else.
+
+**A script migration commit MUST carry its harness.** The D-060 revert updated
+the scripts and left their harnesses testing the retired D-058 world -- red-at-
+HEAD tests that trained everyone to ignore red. If you change a script's
+behavior or vocabulary, the same commit updates fixtures and expectations.
+
+**Static contract:** `bash scripts/repo-lint.sh` must exit 0 before anything is
+delivered (L1 ASCII/LF, L2 stale tokens, L3 ghost refs, L4 deprecated refs,
+L5 numbering, L6 bare invocations). A guard script that must NAME stale tokens
+opts out per-file with a `repo-lint: allow-stale-tokens` marker in its header.
+Committed binaries (e.g. policies/overrides.zip) need a `*.zip binary`
+.gitattributes rule or the LF normalization corrupts them in transit.
+Introducing a tool dependency (jq, python module) means adding its presence
+gate in the same change.
+
+Read-only verify scripts DETECT and report; remediation stays a gated human
+step even when the fix is obvious - the script's job is evidence, the
+operator's job is the mutation decision.
diff --git a/.claude/skills/openstack-cloud-ops/references/troubleshooting.md b/.claude/skills/openstack-cloud-ops/references/troubleshooting.md
new file mode 100644
index 0000000..ef2567c
--- /dev/null
+++ b/.claude/skills/openstack-cloud-ops/references/troubleshooting.md
@@ -0,0 +1,88 @@
+# Troubleshooting - method, reflexes, and routing
+
+The repo's `runbooks/appendix-A-troubleshooting.md` is the operational
+symptom->cause->fix index, keyed by D-NNN/DOCFIX-NNN (it now includes the
+mysql-innodb-cluster recovery signatures and a point-of-use identifier index).
+ALWAYS check it before diagnosing from first principles - most failures on
+this cloud have been seen before and have a validated fix.
+`ops-capi-recovery.md` covers the CAPI/Magnum stack;
+`runbooks/ops-restart-procedure.md` covers full-cloud recovery.
+This file gives you the method and the reflexes that must fire BEFORE and
+WHILE you read those.
+
+## Triage method
+
+1. **Reproduce and capture verbatim.** The exact error text routes you in
+   appendix-A. Paraphrased symptoms match nothing.
+2. **Eliminate cheap layers first.** Web UI misbehaving: incognito/client
+   state (~10s) before any server hypothesis. CLI misbehaving: confirm the
+   token scope (project/domain) before blaming the service - wrong ambient
+   scope produces misleading 404s/403s, especially with domain-per-client
+   identity (an un-domain-qualified lookup resolves in the WRONG domain).
+3. **Establish what is actually true, live.** `bash scripts/cloud-assert.sh`
+   first - it runs every service-own-verdict gate in one read-only sweep and
+   localizes the failing layer. Then the targeted read-only audit of that
+   surface before touching anything. juju status, the service's own state
+   (haproxy admin socket, mysql cluster-status, vault status, ovn-appctl),
+   the relevant logs. Hard rule 2: measurements, not memory.
+4. **Match against appendix-A**, then design-decisions if it smells
+   architectural. If matched: follow the recorded fix exactly (verbatim-
+   reference rule for anything one-shot).
+5. **If unmatched:** smallest-possible hypothesis, read-only test to confirm
+   or kill it, and log the finding. New root causes become appendix-A /
+   DOCFIX material - capturing them is part of the fix.
+6. **Least-destructive first, individually gated.** Reload before restart,
+   restart before rebuild, rebuild before redeploy. Never batch.
+
+## Reflexes (internalize these; they override intuition)
+
+- **juju-green is necessary, not sufficient.** `active/idle` is BLIND to:
+  a DOWN haproxy backend (D-045 - hid a dead nova-api for 3 days), a missing
+  magnum trustee domain (D-046 - magnum reports ready regardless), an
+  unparsed-but-attached policy override, and per-backend/service state
+  generally. Gate on the service's OWN verdict (admin sockets, functional
+  probes), never on juju status alone.
+- **"Reports OK while broken" generalizes.** Charm-side validation is often
+  parse-level only (the keystone policy zip validates YAML, not semantics).
+  Acceptance is BEHAVIORAL: prove the capability works, not that the config
+  is present.
+- **Distinguish down from thrashing.** A host that looks "down" to juju/OVN
+  (ovsdb inactivity-probe storms) may be swap-thrashing, not down (D-040).
+  Check `who -b` / `uptime` / `journalctl -k | grep -i oom` before treating
+  it as an outage.
+- **Read the reason field before the status field.** Magnum health UNHEALTHY
+  has three signatures (D-042 amendment): reason EMPTY = conductor cannot
+  reach the mgmt API (VM down?); all-Ready except infrastructure "not found"
+  = the cosmetic driver-contract miss; a reason citing LB failure = real,
+  check Octavia. Same status, three different responses.
+- **Single-node mgmt VM does not self-heal (D-035/D-041).** Workload nodes
+  wedged with the `uninitialized` taint, magnum reconcile dead, addons
+  Pending -> check `capi-mgmt-v2` is ACTIVE before anything else. Manual
+  start is POLICY (down is a signal), not a defect.
+- **Known blast-radius traps:** `juju destroy-model` decomposes MAAS
+  pod-composed machines - machine retention is `juju remove-machine
+  --keep-instance` (D-061); mysql-innodb-cluster never bootstraps at
+  num_units 1 - deploy at 3 (D-062); `reboot-cluster-from-complete-outage`
+  is destructive against an already-healthy cluster - check cluster status
+  FIRST; vault restarts sealed by design - sealed-after-reboot is expected,
+  not a fault; the magnum `domain-setup` action must be re-run after every
+  redeploy (D-046).
+- **Green-in-the-shell, broken-in-the-daemon.** Interactive shells have a
+  different PATH, env, and stdin than LSB-init daemons. Verify under the
+  daemon's conditions (restricted PATH, live-process args) - see
+  script-authoring.
+- **The absence of an expected resource is a finding, not a gap to fill.**
+  If something that should exist does not (a domain, a rule, an image
+  property), find out WHY it is absent before recreating it - it may have
+  been removed deliberately (check design-decisions) or its absence may be
+  the actual root cause several layers up.
+
+## Escalation and blockers
+
+- Missing vault unseal keys, lost one-shot secrets, or anything requiring
+  material only the operator holds: STOP and escalate. Do not improvise
+  around a recovery blocker (a vault re-init wipes the TLS plane).
+- If a fix would deviate from a runbook or contradict a D-NNN: log it as a
+  proposal and get a ruling. Mid-incident is exactly when discipline pays.
+- Two failed fix attempts on the same hypothesis = the hypothesis is wrong.
+  Step back to the audit stage; widen what you are measuring.
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..df15ae5
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,53 @@
+# CLAUDE.md -- Omega Cloud jumphost session contract
+
+This working directory is the LIVE operations clone on the jumphost
+(vopenstack-jesse). Commands here reach the real cloud: the juju controller,
+MAAS, and the OpenStack APIs. This file is always in context; the full
+operating skill loads on demand from `.claude/skills/openstack-cloud-ops/`
+(read its SKILL.md before any nontrivial task -- it carries the routing table,
+the standard loops, and the script-authoring/troubleshooting discipline).
+
+## Hard rules (non-negotiable; a live shell relaxes transport, never discipline)
+
+1. Execute only the current runbook step, exactly as written. No added scope,
+   no adjacent improvements mid-step. Findings are LOGGED (changelog / D-NNN
+   proposal), never executed live mid-step.
+2. Never use an inferred value. No IP, ID, name, or scope enters a command
+   unless measured this session or carried from confirmed as-built. Would-be
+   inferences STOP and get measured.
+3. Verify before mutate. Read-only audit first; every mutation is presented,
+   justified as the minimal correct action, and individually human-approved
+   (the permission `ask` rules enforce this -- do not work around them).
+   Destructive steps are never batched.
+
+## Session bootstrap (run at the start of every session)
+
+    git pull
+    bash scripts/repo-lint.sh          # expect 0 fail (1 legacy WARN is documented)
+    bash scripts/run-tests-all.sh      # after any tooling change; ALL GREEN expected
+
+For any session that will mutate the cloud, the operator starts logging first:
+`bash scripts/run-logged.sh <label>` (see docs/as-executed-log-convention.md).
+
+## Secrets (enforced by permission rules + the PreToolUse guard; also a norm)
+
+Never read key or credential material into context: `~/vault-init/`,
+`~/as-executed/`, `~/tenant-*/`, any `*-cred.txt` / `*appcred*` file. Verify
+secrets by length/format from a script, never by printing. One-shot vault
+operations (init/rekey/generate-root) are operator-only, verbatim from the
+runbook -- the guard hook hard-blocks them.
+
+## Where things are
+
+- Deploy gate: `bash scripts/preflight.sh` (the ONLY pre-deploy entry).
+- Cloud health: `bash scripts/cloud-assert.sh` (behavioral; `--capture` = BOM).
+- Incidents: `runbooks/appendix-A-troubleshooting.md` by verbatim symptom.
+- Restart: `runbooks/ops-restart-procedure.md`.
+- Decisions: `docs/design-decisions.md` -- grep for the governing D-NNN before
+  ANY change to a built surface; PROPOSED means present options, never pick.
+- Numbering: grep for next-free before assigning any D-/DOCFIX-/BUNDLEFIX-NNN.
+- Delivery: every script change ships with its `tests/<name>/run-tests.sh`
+  harness green, `repo-lint` clean, and a changelog entry with a revert.
+
+Repo content is authoritative over anything remembered from prior sessions;
+when they diverge, the repo wins and the divergence gets flagged.
diff --git a/docs/changelog-20260703-process-hardening.md b/docs/changelog-20260703-process-hardening.md
index 4ea61d2..9eaba71 100644
--- a/docs/changelog-20260703-process-hardening.md
+++ b/docs/changelog-20260703-process-hardening.md
@@ -312,3 +312,46 @@
 by design). Exit-code audit: clean (tenant-acceptance's 11-14 are a declared
 per-phase contract). deploy-watch, maas-fabric-prune, juju-spaces-check,
 osd-blank-check: read; disciplined; no findings.
+
+================================================================================
+## Block 5 -- Claude Code on the jumphost (guardrails + session contract)
+================================================================================
+Context: Claude Code now runs on the jumphost with this repo as the project
+dir -- a live shell with real cloud credentials. Mechanics verified against
+the Claude Code permissions docs: rules evaluate deny -> ask -> allow; deny is
+absolute in every mode; a PreToolUse hook exit-2 blocks BEFORE permission
+evaluation; Read() deny rules do not stop arbitrary subprocess file reads.
+
+### 28. NEW -- CLAUDE.md (always-loaded session contract)
+The three hard rules, session bootstrap, secrets norms, and routing pointers.
+Deliberately short: always-in-context essentials only; depth lives in the skill.
+
+### 29. NEW -- .claude/settings.json (committed, team-shared permission rules)
+allow = the CHECK surface (read-only juju/maas/openstack, the gate scripts,
+harnesses). ask = the ENTIRE mutation surface (juju/maas/openstack mutations,
+juju ssh/run/exec, anything --apply, sudo, virsh, git commit/push) -- this IS
+the gated-mutation model, enforced by the tool rather than by convention.
+deny = the never class: vault one-shots, destroy-controller, maas list
+(DOCFIX-016), force-push, and Read/Edit on ~/vault-init, ~/as-executed,
+~/tenant-*, *-cred.txt, *appcred*. Design choice (debate): destructive ops are
+ASK not DENY -- they legitimately run under runbooks with human approval; deny
+is reserved for operations NO session should ever perform.
+
+### 30. NEW -- .claude/hooks/guard-destructive.py + tests/claude-guard/ (13 cases)
+PreToolUse belt-and-suspenders: hard-blocks (exit 2, all modes, even bypass)
+the never class plus secret-file SHELL reads that Read() rules cannot see.
+Exists because hook blocks precede permission evaluation and upstream Bash
+rule enforcement has a documented reliability history. Fail-open contract on
+malformed input is deliberate (permission rules still apply) and tested.
+
+### 31. Skill relocated to .claude/skills/openstack-cloud-ops/
+Claude Code auto-discovers project skills there; skills/ top-level now carries
+only the packaged .skill artifact (for claude.ai upload). Single source: edit
+under .claude/skills/, repackage the .skill from it, reinstall in claude.ai.
+MANUAL GIT ACTION if the move shows as delete+add: none needed (git mv staged).
+
+### Not done, deliberately (revisit triggers noted)
+- OS sandboxing (bubblewrap): additive value once Claude Code usage matures;
+  the permission+hook layer is the v1 boundary. Revisit at Roosevelt.
+- disableBypassPermissionsMode: managed-settings scope (system paths, admin) --
+  worth doing when a second operator joins the jumphost.
diff --git a/tests/claude-guard/run-tests.sh b/tests/claude-guard/run-tests.sh
new file mode 100644
index 0000000..31f8bcf
--- /dev/null
+++ b/tests/claude-guard/run-tests.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# tests/claude-guard/run-tests.sh -- offline harness for the Claude Code
+# PreToolUse guard (.claude/hooks/guard-destructive.py). Proves each NEVER
+# rule blocks (exit 2) and normal ops pass (exit 0), including the malformed-
+# input fail-open-to-permission-rules contract. ASCII + LF.
+set -uo pipefail
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+HOOK="$(cd "$HERE/../.." && pwd)/.claude/hooks/guard-destructive.py"
+PASS=0; FAIL=0
+run() { # run <want_rc> <label> <json-or-raw>
+  local rc
+  printf '%s' "$3" | python3 "$HOOK" >/dev/null 2>&1; rc=$?
+  if [ "$rc" = "$1" ]; then echo "  PASS  $2"; PASS=$((PASS+1))
+  else echo "  FAIL  $2 (rc=$rc want=$1)"; FAIL=$((FAIL+1)); fi
+}
+cmd() { printf '{"tool_input":{"command":"%s"}}' "$1"; }
+
+run 2 "vault operator init blocked"            "$(cmd 'vault operator init > /tmp/x 2>&1')"
+run 2 "vault operator rekey blocked"           "$(cmd 'VAULT_ADDR=x vault operator rekey')"
+run 2 "destroy-controller blocked"             "$(cmd 'juju destroy-controller maas-controller')"
+run 2 "maas list blocked (DOCFIX-016)"         "$(cmd 'maas list')"
+run 2 "force-push blocked"                     "$(cmd 'git push --force origin main')"
+run 2 "secret read blocked (vault-init)"       "$(cmd 'cat ~/vault-init/unseal-keys.txt')"
+run 2 "secret read blocked (appcred)"          "$(cmd 'head -1 /home/j/tenant-acme/acme-svc-appcred.txt')"
+run 2 "catastrophic rm blocked"                "$(cmd 'rm -rf /')"
+run 0 "juju status passes"                     "$(cmd 'juju status -m openstack --format=json')"
+run 0 "cloud-assert passes"                    "$(cmd 'bash scripts/cloud-assert.sh')"
+run 0 "vault status (non-one-shot) passes"     "$(cmd 'juju ssh vault/0 -- vault status')"
+run 0 "maas admin read passes (not maas list)" "$(cmd 'maas admin machines read')"
+run 0 "malformed input -> no opinion (rc 0)"   'not-json-at-all'
+
+echo; echo "RESULT: PASS=$PASS FAIL=$FAIL"
+[ "$FAIL" -eq 0 ] && { echo "ALL PASS"; exit 0; } || exit 1