diff --git a/.gitignore b/.gitignore
index e699e05..bc74216 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,13 @@
 passphrase.txt
 # Bundle backups (timestamped)
 bundle.yaml.bak-*
+
+# --- repo-sanitation sweep additions ---
+/remove/
+*.kubeconfig
+kubeconfig
+vault-init*.txt
+init.txt
+*.bak
+*.tmp
+.DS_Store
diff --git a/README.md b/README.md
index ec91812..465cb4e 100644
--- a/README.md
+++ b/README.md
@@ -1,120 +1,111 @@
-# openstack-caracal-ipv4 — VR0 DC0 Omega Cloud (v1)
+# openstack-caracal-ipv4 -- VR0 DC0 Omega Cloud (v1)
 
-**Scope:** Charmed OpenStack Caracal (2024.1) IPv4-only testcloud deployment
-on the 4-VM KVM lab, modeled in NetBox as **VR0 DC0 Omega Cloud**.
+**Scope:** Charmed OpenStack Caracal (2024.1), IPv4-only, on the 4-VM KVM lab and
+modeled in NetBox as **VR0 DC0 Omega Cloud**. This repository is the deployment
+method: bundle, overlay, gated runbook, and validation scripts together describe
+everything required to bring the cloud up from a clean MAAS-managed Juju model. It is
+a rehearsal for the future bare-metal **Roosevelt** deployment; design choices favour
+the transferable answer over the quick fix so the testcloud surfaces real production
+requirements.
 
-## v1 vs. v2 — read this first
+## v1 vs. v2 -- read this first
 
-This repository is the **v1 deliverable** — IPv4-only Caracal Charmed
-OpenStack on the existing MAAS-provisioned network layout. v1 ships first
-because the upstream router infrastructure is not yet IPv6-ready; deferring
-on IPv6 lets v1 prove the bundle, Option B binding fix, Magnum CAPI graft,
-Designate-from-day-one, and the hacluster relation pattern at testcloud scale
-without waiting on network-side IPv6 readiness.
-
-**v2** adds IPv6 / dual-stack per the address-family matrix retained as
-v2-scope decisions in `docs/design-decisions.md` (D-004, D-004a). v2 will
-ship either as a sibling overlay in this repository (`overlays/v2-dualstack.yaml`
-on a `v2` branch) or as a separate repository — TBD when v2 work begins.
-
-The IPv6 prefixes already imported into NetBox under VR0 DC0 remain in
-NetBox as **Reservation status** to document the v2 intent without
-implying they are active. See `netbox/ipv6-mark-reserved.py`.
-
-## Repository purpose
-
-This repository is the deployment method. Bundle, overlays, runbooks, and
-validation scripts together describe everything required to bring up the
-cloud from a clean MAAS-managed Juju model. Anyone with NetBox read access,
-MAAS access, and the Juju controller can clone this repository and reproduce
-the cloud.
+v1 is IPv4-only Caracal on the existing MAAS-provisioned network layout; it ships first
+because the upstream router infrastructure is not yet IPv6-ready. v2 adds IPv6 /
+dual-stack (decisions D-004 / D-004a, retained as v2-scope in
+`docs/design-decisions.md`) and will ship either as a sibling overlay on a `v2` branch
+or as a separate repository (TBD when v2 begins). IPv6 prefixes already imported into
+NetBox under VR0 DC0 remain at **Reservation** status to document v2 intent without
+implying they are active (`netbox/ipv6-mark-reserved.py`).
 
 ## Source of truth
 
-**NetBox is authoritative for IPAM.** Any IP, prefix, or VLAN value
-referenced in this repository traces back to NetBox. The exception is
-tenant per-project subnets, which under the v1 hybrid model (D-016) are
-Neutron-managed within a NetBox-modeled upstream tenant pool — i.e., the
-pool has NetBox standing, individual tenant subnets do not.
+**NetBox is authoritative for IPAM.** Every IP, prefix, and VLAN referenced in this
+repository traces back to NetBox. The exception is per-project tenant subnets, which
+under the v1 hybrid model (D-016) are Neutron-managed inside a NetBox-modeled upstream
+pool -- the pool has NetBox standing; individual tenant subnets do not.
 
 ## Repository layout
 
 ```
 openstack-caracal-ipv4/
-├── README.md                        # this file
-├── bundle.yaml                      # canonical Charmed OpenStack bundle (IPv4)
-├── overlays/
-│   └── vr0-dc0-testcloud.yaml       # 4-VM lab specifics; num_units=1 + hacluster
-├── runbooks/
-│   # (deprecated; see runbooks/deprecated/ - superseded by D-017 + D-018 + v1-do-doc-NN set)
-│   ├── 01-destroy-model.md          # destroy openstack model + verify
-│   ├── 02-deploy.md                 # juju deploy + settle wait
-│   ├── 03-vault-init.md             # vault unseal + cert auth
-│   ├── 04-magnum-domain.md          # domain-setup action + keystone wiring
-│   ├── 04a-capi-bootstrap-cluster.md # capi-mgmt VM deploy + k3s + CAPI + ORC (D-017)
-│   ├── 05-magnum-capi-driver.md     # pip install driver + kubeconfig + systemd
-│   ├── 06-tenant-setup.md           # project, user, openrc, app credentials
-│   ├── 07-dns-zones.md              # Designate zones + API VIP A records (v1)
-│   └── 08-validate.md               # Roosevelt-rehearsal validation criteria
-├── scripts/
-│   ├── pre-flight-checks.sh         # pre-deploy sanity checks
-│   └── validate.sh                  # end-to-end validation runner
-├── netbox/
-│   ├── README.md                    # what's here vs. what's deferred to v2
-│   ├── ipv4-prefixes-import.py      # adds IPv4 prefixes + IPv4 tenant pool
-│   └── ipv6-mark-reserved.py        # marks IPv6 entries as Reservation (Q3)
-└── docs/
-    ├── design-decisions.md          # architectural record (D-001 through D-019)
-    └── netbox-vip-queue.md          # post-deploy NetBox imports (workstream 2)
+|-- README.md                        this file
+|-- bundle.yaml                      canonical Charmed OpenStack bundle (IPv4); the
+|                                      testcloud num_units / VIPs / hacluster are baked in
+|-- overlays/                        empty in git -- the only overlay (octavia-pki.yaml)
+|                                      is generated at deploy (phase-01) and is secret-
+|                                      bearing, so it is never committed
+|-- runbooks/                        the gated deploy runbook (phase-NN) + appendices
+|   |-- README.md                    runbook index, order, and conventions
+|   |-- phase-00-teardown-maas-reset.md
+|   |-- phase-01-bundle-deploy.md
+|   |-- phase-02-vault-bringup.md
+|   |-- phase-03-core-verify.md
+|   |-- phase-04-network-carve.md
+|   |-- phase-05-octavia-enablement.md
+|   |-- phase-06-incloud-mgmt-cluster.md
+|   |-- phase-07-conductor-graft.md
+|   |-- phase-08-workload-cluster-acceptance.md
+|   |-- appendix-A-troubleshooting.md
+|   \-- appendix-B-asbuilt-version-lock.md
+|-- scripts/
+|   |-- pre-flight-checks.sh         pre-deploy sanity checks
+|   |-- validate.sh                  end-to-end validation runner
+|   \-- review-bundle.py             bundle lint / review
+|-- netbox/
+|   |-- README.md                    what is imported vs. deferred to v2
+|   |-- ipv4-prefixes-import.py      IPv4 prefixes + IPv4 tenant pool
+|   \-- ipv6-mark-reserved.py        marks IPv6 entries Reservation (v2 intent)
+\-- docs/
+    |-- design-decisions.md          architectural record (D-NNN)
+    |-- netbox-vip-queue.md          post-deploy NetBox imports
+    \-- v1-pre-deploy-fixes.md       completed pre-deploy repo-hardening change list (D-019 series)
 ```
 
-## v1 deployment order
+## Deploy order
 
-The deploy is executed via the `runbooks/v1-do-doc-NN-*.md` execution documents in numeric order:
+Run the `runbooks/phase-NN-*.md` documents in numeric order. Each phase ends in a hard
+gate (an explicit pass/fail check); do not begin the next phase until the current gate
+passes. The two appendices are reference, not steps. See `runbooks/README.md` for the
+per-phase summary and the RUN-location conventions.
 
-| Doc | Purpose |
-|---|---|
-| `v1-do-doc-01-prep.md` | Pre-flight state check (repo, openrc, MAAS state of 5 VMs) |
-| `v1-do-doc-02-pki.md` | Octavia PKI overlay generation |
-| `v1-do-doc-03-destroy.md` | Conditional model + MAAS teardown (clean state for rebuild) |
-| `v1-do-doc-04-deploy.md` | `juju deploy` + settle wait + on-disk PKI verification |
-| `v1-do-doc-05-vault-init.md` | Vault initialization + cert cascade + admin-openrc regeneration |
-| `v1-do-doc-06-magnum-domain.md` | Magnum Keystone domain setup |
-| `v1-do-doc-07-capi-bootstrap.md` | CAPI bootstrap cluster + workload pivot |
-| `v1-do-doc-08-magnum-driver.md` | Magnum CAPI Helm driver graft |
-| `v1-do-doc-09-tenant.md` | Tenant project/user/openrc + Snapshot 2 |
-| `v1-do-doc-10-validate.md` | D-011 acceptance criteria + Snapshot 3 |
+| Phase    | Purpose                                                          |
+| -------- | ---------------------------------------------------------------- |
+| phase-00 | Teardown + MAAS reset (clean state for rebuild)                  |
+| phase-01 | Bundle deploy (incl. Octavia PKI overlay generation) + settle    |
+| phase-02 | Vault bring-up (PKI root; cert cascade)                          |
+| phase-03 | Core verify (settle, admin-openrc regeneration, Horizon)         |
+| phase-04 | Network carve (provider external network + IPAM reference)       |
+| phase-05 | Octavia enablement                                               |
+| phase-06 | In-cloud CAPI management cluster (D-035)                         |
+| phase-07 | Magnum conductor graft (magnum-capi-helm driver; D-031/D-037/D-042) |
+| phase-08 | Workload-cluster acceptance (D-011)                             |
 
-NetBox imports are run separately (gated on external NetBox engineer review; see `netbox/README.md`).
+NetBox imports run separately, gated on external NetBox-engineer review
+(`netbox/README.md`).
 
-## v1-specific design decisions (summary; see docs/design-decisions.md for full record)
+## Key v1 scope (full record in docs/design-decisions.md)
 
-- **D-015 v1/v2 fork** — IPv4-only v1; IPv6/dual-stack v2 deferred
-- **D-016 IPv4 tenant pool hybrid model** — NetBox owns upstream `/16` pool;
-  Neutron owns per-project subnets within it
-- **D-003 Option B network architecture** — Provider `/22` carries both
-  ext_net FIPs (`10.12.4.10–.223`) and OpenStack public API VIPs
-  (`10.12.4.224–.254`) on the same L2 segment; fixes the tenant→API
-  unreachability that caused Magnum OCCM crashloop on Bobcat testcloud
-- **D-005 Ceph Squid** — matches Caracal default; rehearses Roosevelt
-- **D-006 Vault HA backend = etcd + easyrsa**
-- **D-007 Magnum from day one** — charm in bundle + CAPI Helm driver graft
-- **D-019 (supersedes D-008) DNS scope reduction for v1** — Designate deferred
-  to v2 alongside corporate DNS / NS-delegation work. Tenant subnets use public
-  DNS (`1.1.1.1` / `1.0.0.1`) directly via `--dns-nameserver`.
-  `*.cloud.neumatrix.local` FQDN tree remains internal-only, resolved via static
-  `/etc/hosts` on bootstrap-relevant hosts.
-- **D-009 Hacluster relations included at num_units=1** — decorative on
-  testcloud; documents the relation pattern for Roosevelt scale-up
-- **No OVN pinning on testcloud** — Roosevelt bare-metal will pin via `ovn-source`
+- **D-015 v1/v2 fork** -- IPv4-only v1; IPv6 / dual-stack deferred to v2.
+- **D-016 IPv4 tenant-pool hybrid** -- NetBox owns the upstream pool; Neutron owns
+  per-project subnets within it.
+- **D-019 (supersedes D-008) DNS scope reduction** -- Designate deferred to v2; tenant
+  subnets use public DNS (1.1.1.1 / 1.0.0.1) via `--dns-nameserver`; the internal
+  `*.cloud.neumatrix.local` tree is resolved by static `/etc/hosts` on
+  bootstrap-relevant hosts.
+- **D-020 dual provider+metal HA VIPs** -- API charms carry a VIP on both the provider
+  and metal spaces (front-loaded; exact values live in `bundle.yaml` / NetBox).
+- **D-035 in-cloud management cluster** -- the CAPI / Magnum management cluster is a
+  single-homed in-cloud tenant VM (no out-of-cloud node, no clusterctl pivot).
+- **D-031 / D-037 / D-042 Magnum KaaS** -- tenant self-service Kubernetes via Magnum + the
+  magnum-capi-helm driver + the azimuth capi-helm-charts engine; the driver pin must be
+  contract-coherent with the CAPI core (see Appendix B).
+- **D-009 hacluster at num_units=1** -- decorative on testcloud; documents the relation
+  pattern for Roosevelt scale-up.
 
-## v2-scope decisions (deferred — read but do not action in v1)
+v2-scope decisions (D-004 dual-stack/IPv6 matrix; D-004a host-management-to-metal) are
+recorded but NOT actioned in v1.
 
-- **D-004 Dual-stack/IPv6-only matrix** — applies in v2 only
-- **D-004a Host management → Metal (Option A)** — applies in v2 only;
-  v1 keeps openstack0-3 host management IPs on the storage fabric
-- **VLAN modeling in NetBox** (Q2) — the VR0 DC0-VLANs group remains with
-  only VID 240 (OS-Provider) imported during prior session work; remaining
-  VLAN entries are deferred to v2 when actual VLAN tagging is in play.
-  Currently MAAS uses untagged-per-fabric, so the additional VLAN entries
-  would be misleading documentation
+> `docs/design-decisions.md` is the authoritative decision record. If it lags the
+> bundle/runbook (for example the D-020 VIP scheme or the D-028..D-042 series),
+> reconcile it there.
diff --git a/bundle.yaml b/bundle.yaml
index b17ff09..bdeb77b 100644
--- a/bundle.yaml
+++ b/bundle.yaml
@@ -126,7 +126,8 @@
     num_units: 1                       # 3 on Roosevelt (D-009)
     to: [lxd:8]
     options:
-      vip: "10.12.4.10 10.12.8.10"     # B1 front-loaded VIP; IS the catalog endpoint (B5, no os-public-hostname)
+      vip: "10.12.4.50 10.12.8.50"     # B1 front-loaded VIP; IS the catalog endpoint (B5, no os-public-hostname)
+      use-policyd-override: true       # as-built reconcile 2026-06-09 (origin untraced -- Review-later)
     bindings: *api-bindings
     constraints: arch=amd64
 
@@ -145,7 +146,8 @@
     num_units: 1
     to: [lxd:11]
     options:
-      vip: "10.12.4.13 10.12.8.13"     # B1
+      vip: "10.12.4.53 10.12.8.53"     # B1
+      image-conversion: true           # as-built; image conversion enabled (raw on Ceph-backed glance)
     bindings:                          # api-bindings + ceph->storage (C2; glance is a Ceph client)
       "":            metal
       public:        provider
@@ -180,7 +182,7 @@
     options:
       console-access-protocol: novnc
       network-manager: Neutron
-      vip: "10.12.4.16 10.12.8.16"     # B1
+      vip: "10.12.4.56 10.12.8.56"     # B1
     bindings: *api-bindings
     constraints: arch=amd64
 
@@ -197,6 +199,7 @@
       migration-auth-type: ssh
       resume-guests-state-on-host-boot: true
       virt-type: qemu                  # Testcloud nested-KVM; Roosevelt will use 'kvm'
+      reserved-host-memory: 8192       # ENV(testcloud 16GiB hosts) D-040 OOM fix; charm default 512 -- DO NOT drop
     bindings:                          # C2 ceph/ceph-access -> storage. OVN-on-data: neutron-plugin -> data
       "":            metal             #   puts 'data' in this principal's binding set so ovn-chassis' data
       ceph:          storage           #   binding is a valid SUBSET (subordinate subset rule). nova-compute is
@@ -215,7 +218,7 @@
     num_units: 1
     to: [lxd:11]
     options:
-      vip: "10.12.4.19 10.12.8.19"     # B1
+      vip: "10.12.4.59 10.12.8.59"     # B1
     bindings: *api-bindings
     constraints: arch=amd64
 
@@ -237,7 +240,7 @@
       enable-ml2-port-security: true
       flat-network-providers: physnet1
       neutron-security-groups: true
-      vip: "10.12.4.15 10.12.8.15"     # B1
+      vip: "10.12.4.55 10.12.8.55"     # B1
     bindings: *api-bindings
     constraints: arch=amd64
 
@@ -298,7 +301,7 @@
     options:
       block-device: None
       glance-api-version: 2
-      vip: "10.12.4.12 10.12.8.12"     # B1
+      vip: "10.12.4.52 10.12.8.52"     # B1
     bindings:                          # api-bindings + ceph -> storage. cinder's container needs a storage NIC
       "":            metal             #   for Ceph; binding the regular 'ceph' endpoint provisions it AND puts
       public:        provider          #   'storage' in cinder's binding set, so cinder-ceph's ceph->storage is a
@@ -360,7 +363,7 @@
     to: [lxd:8]
     options:
       source: *ceph-source
-      vip: "10.12.4.20 10.12.8.20"     # B1 -- radosgw HA un-deferred for Roosevelt fidelity (decorative HA on testcloud)
+      vip: "10.12.4.60 10.12.8.60"     # B1 -- radosgw HA un-deferred for Roosevelt fidelity (decorative HA on testcloud)
     bindings:                          # api-bindings + mon->storage (C2). radosgw IS externally-facing (S3/Swift API).
       "":            metal
       public:        provider
@@ -378,7 +381,7 @@
     to: [lxd:10]
     options:
       debug: "false"
-      vip: "10.12.4.18 10.12.8.18"     # B1 -- browse HTTPS by IP (B5); ALLOWED_HOSTS must permit the VIP IP (verify at deploy)
+      vip: "10.12.4.58 10.12.8.58"     # B1 -- browse HTTPS by IP (B5); ALLOWED_HOSTS must permit the VIP IP (verify at deploy)
     bindings: *api-bindings
     constraints: arch=amd64
 
@@ -409,7 +412,7 @@
       #   juju deploy ./bundle.yaml \
       #     --overlay overlays/vr0-dc0-testcloud.yaml \
       #     --overlay overlays/octavia-pki.yaml
-      vip: "10.12.4.17 10.12.8.17"     # B1
+      vip: "10.12.4.57 10.12.8.57"     # B1
     bindings:                          # api-bindings + ovsdb-cms -> data. octavia's CONTAINER needs a data NIC so
       "":            metal             #   ovn-chassis-octavia can geneve-encap on the overlay; ovsdb-cms is a
       public:        provider          #   REGULAR (octavia<->ovn-central) endpoint -- unused in the amphora-driver
@@ -446,7 +449,7 @@
     to: [lxd:11]
     options:
       openstack-origin: *openstack-origin
-      vip: "10.12.4.11 10.12.8.11"     # B1
+      vip: "10.12.4.51 10.12.8.51"     # B1
     bindings: *api-bindings
     constraints: arch=amd64
 
@@ -478,7 +481,7 @@
     options:
       openstack-origin: *openstack-origin
       region: RegionOne
-      vip: "10.12.4.14 10.12.8.14"     # B1
+      vip: "10.12.4.54 10.12.8.54"     # B1
     bindings: *api-bindings
     constraints: arch=amd64
 
diff --git a/docs/design-decisions.md b/docs/design-decisions.md
index 3e3aab3..995e473 100644
--- a/docs/design-decisions.md
+++ b/docs/design-decisions.md
@@ -32,7 +32,7 @@
 
 | Charm group                                                                                                             | Channel                    |
 | ----------------------------------------------------------------------------------------------------------------------- | -------------------------- |
-| OpenStack core (keystone, glance, nova-\*, neutron-api, cinder, placement, octavia, barbican, designate, magnum) | `2024.1/stable`            |
+| OpenStack core (keystone, glance, nova-\*, neutron-api, cinder, placement, octavia, barbican, designate, magnum, vault) | `2024.1/stable`            |
 | OVN (ovn-central, ovn-chassis, ovn-dedicated-chassis-octavia)                                                           | `24.03/stable`             |
 | Ceph (ceph-mon, ceph-osd, ceph-radosgw if used)                                                                         | `squid/stable` (see D-005) |
 | MySQL (mysql-innodb-cluster, mysql-router subordinates)                                                                 | `8.0/stable`               |
@@ -139,12 +139,12 @@
 
 - `juju run magnum/leader domain-setup --wait=10m`
 - pip install `magnum-capi-helm==1.1.0` from PyPI into the magnum charm venv with `--break-system-packages` (stackhpc/magnum-capi-helm fork archived Dec 2024; canonical project moved to `openstack/magnum-capi-helm` on opendev/PyPI; 1.1.0 is the last Caracal-cycle release. Upstream tests against Magnum 2023.1+, so backward-compatible through Caracal 2024.1.)
-- Deploy `/etc/magnum/kubeconfig` pointing at `capi-mgmt.maas` bootstrap k3s
+- Deploy `/etc/magnum/kubeconfig` pointing at the **workload cluster** (the post-pivot home of CAPI controllers per **runbook 04a §17** `clusterctl move`). Staged on jumphost at `$HOME/magnum-capi/capi-mgmt-cluster.kubeconfig` by runbook 04a §19, transferred to the magnum unit by runbook 05 §6. Bobcat had this pointing at bootstrap k3s because the pivot was never executed; workstream 3b (2026-05-22) made the pivot mandatory.
 - Systemd override replacing init.d ExecStart to load `--config-dir`
 - `/etc/magnum/magnum.conf.d/99-capi.conf` setting `enabled_drivers=k8s_capi_helm_v1` and `[capi_helm] kubeconfig_file=/etc/magnum/kubeconfig` (ASCII-only; non-ASCII characters in conf.d cause silent daemon failures)
 
 
-**CAPI mgmt plane:** `capi-mgmt.maas` bootstrap k3s. Per **D-017**, this cluster is rebuilt from scratch every deployment cycle — there is no preserved-across-rebuild artifact. The install procedure for the bootstrap cluster lives in `runbooks/04a-capi-bootstrap-cluster.md` and runs **before** this runbook. This pattern transfers to Roosevelt unchanged.
+**CAPI mgmt plane:** Post-pivot, the workload cluster IS the CAPI management plane (per **runbook 04a §17**, `clusterctl move` pivots cluster state from the `capi-mgmt.maas` bootstrap k3s into the workload cluster, which becomes self-managing). Per **D-017**, both the bootstrap k3s and the workload cluster are rebuilt from scratch every deployment cycle — there is no preserved-across-rebuild artifact. The bootstrap install + pivot procedure lives in `runbooks/04a-capi-bootstrap-cluster.md` and runs **before** this runbook. This pattern transfers to Roosevelt unchanged.
 
 **Superseded portions:** The "preserved across rebuild" stance in earlier drafts of this decision is **superseded by D-017**. See D-017 for rationale. The earlier `stackhpc/magnum-capi-helm` v0.13.0 driver pin is superseded by the `openstack/magnum-capi-helm` 1.1.0 pin above (driver source repo moved + archived).
 
@@ -153,9 +153,7 @@
 
 ## D-008: DNS architecture
 
-**Status:** Superseded by D-019 (2026-05-27). v2-scope. Original decision text preserved below for audit.
-
-**Decision (original; superseded):** Layered — static /etc/hosts for bootstrap + Designate (in bundle from day one) for tenant-level resolution.
+**Decision:** Layered — static /etc/hosts for bootstrap + Designate (in bundle from day one) for tenant-level resolution.
 
 **Naming convention:**
 
@@ -222,12 +220,11 @@
 5. End-to-end Magnum CAPI cluster creation succeeds, including OCCM not crash-looping
 6. Vault unseal + auto-unseal-after-reboot pattern verified
 7. KVM snapshot baseline taken (Phase 5)
+8. Designate zones populated and tenant VMs resolve API hostnames
 
 
 Validation script: `scripts/validate.sh` (TBD).
 
-**Amendment (2026-05-27):** Per D-019, the "Designate resolves" criterion (former item 8) is removed for v1. Designate is deferred to v2; tenant subnets resolve via public DNS. v2 will reinstate a DNS-resolution validation criterion calibrated to whatever DNS mechanism is in place (NS delegation from corporate DNS, or otherwise).
-
 ---
 
 
@@ -261,11 +258,7 @@
 
 **Decision:** Self-hosted GitBucket at `git.baldurkeep.com`.
 
-**Repo path:** `OpenStack/openstack-caracal-ipv4` (v1; IPv4-only).
-
-- Web: `https://git.baldurkeep.com/OpenStack/openstack-caracal-ipv4`
-- Clone: `https://git.baldurkeep.com/git/OpenStack/openstack-caracal-ipv4.git`
-- Moved from `jesse.austin/openstack-caracal-ipv4` to the `OpenStack` group on 2026-05-27. GitBucket does not redirect from the old path.
+**Repo path:** `jesse.austin/openstack-caracal-ipv4` (v1; IPv4-only).
 
 **v2 repository:** TBD when v2 work begins. Two viable paths: sibling repo `openstack-caracal-ipv6` or `openstack-caracal-dualstack`, OR `v2` branch in this repo with an `overlays/v2-dualstack.yaml`. The single-repo-with-branch approach preserves history of what changed v1→v2 together; the sibling-repo approach keeps v1 frozen as a reference once v2 is in motion.
 
@@ -372,46 +365,179 @@
 ---
 
 
-## D-019: DNS scope reduction for v1 — Designate deferred to v2
+## D-019: Cloud DNS (Designate) deferred to v2 / Roosevelt
 
-**Decision (2026-05-27):** Designate is removed from the v1 testcloud bundle and deferred to v2 alongside corporate DNS / NS delegation work. v1 tenant subnets resolve via public DNS (`1.1.1.1`, `1.0.0.1`) directly via the `--dns-nameserver` option at subnet-create time.
+**Decision:** v1 ships with NO cloud-internal DNS; Designate is not deployed. Public service endpoints use FQDNs (`os-public-hostname`) that resolve to the provider VIPs via external/corporate DNS; internal and admin endpoints stay IP-based on the metal VIPs. Tenant instances use upstream resolvers (1.1.1.1 / 1.0.0.1). The D-011 acceptance bar is amended to drop the cloud-DNS criterion, and the planned `v1-do-doc-10-dns` runbook is dropped.
 
-**Supersedes:** D-008 (DNS architecture).
+**Consequence (documented, not a blocker):** metal-only charm units that make catalog-based client calls pull the PUBLIC (FQDN) endpoint and cannot resolve or route it (the internal-endpoint certs carry no FQDN SAN). This is the root of the gss/retrofit amphora-pipeline constraint recorded in D-021. The proper fix (cloud-internal DNS + FQDN-valid certs, or charms consuming internal endpoints) is a Roosevelt item.
 
-**Amends:** D-011 (validation bar — removes "Designate resolves" criterion).
+**Status:** Decided (v1). Reconstructed into this doc from the deploy record (no standalone D-019 file).
 
-### Rationale
+**Related:** D-008 (DNS architecture), D-021 (amphora-pipeline consequence), D-011 (acceptance bar amended).
 
-Three findings from the 2026-05-27 testcloud topology investigation:
+---
 
-1. **Outside-in DNS** (corporate clients resolving `*.cloud.neumatrix.local`) is not needed for v1. Corporate access to the cloud already flows through the existing `openstack.baldurkeep.com → 10.17.4.20 → 10.12.x` HTTPS proxy chain (handled by the edge nginx at `10.17.8.7`), which does not depend on corporate-side resolution of cloud-internal FQDNs.
+## D-020: Dual provider + metal API VIPs on clustered charms
 
-2. **The edge nginx cannot route to `10.12.x` directly.** Inspection confirmed the edge has only `10.17.8.7/22` plus a tailscale interface; reaching `10.12.4.x` requires the libvirt-host NAT path. Adding DNS to the testcloud would require parallel UDP/53 NAT/proxy plumbing across three hosts (edge nginx, libvirt host, internal nginx) for a feature that has no v1 consumer.
+**Decision:** Every clustered OpenStack API application (keystone, glance, nova-cloud-controller, neutron-api, cinder, placement, barbican, octavia, openstack-dashboard, magnum, vault) is configured with BOTH a provider VIP and a metal VIP, as a space-separated pair: `vip: "10.12.4.X 10.12.8.X"` (Option B).
 
-3. **Inside-out DNS** (tenant VMs resolving external names) is satisfied by tenant subnets pointing `--dns-nameserver` at public DNS (`1.1.1.1`, `1.0.0.1`). Designate is not needed in the inside-out path either, since:
-  - Tenant VMs do not need to resolve cloud-internal FQDNs (their API access goes through documented IPs / `--cloud` configs in cloud.conf)
-  - Cross-tenant DNS visibility is not a v1 requirement
+**Rationale:** with a provider-only VIP, `charms_openstack/ip.py:resolve_address(INTERNAL)` returns `None` and raises `ValueError`, breaking `identity-service-relation-joined` (and the analogous internal-endpoint registration on every clustered API charm). Supplying a metal-network VIP alongside the provider VIP gives `resolve_address` an internal address to return, and keeps east-west service traffic on the metal network rather than the provider network.
 
-The remaining v1 use case for Designate (FIP DNS auto-registration via the `neutron-api ↔ designate` integration) is informational only — nothing in v1 consumes those records.
+**Status:** Decided (v1). Reconstructed into this doc from the deploy record (no standalone D-020 file).
 
-### v1 implementation
+**Related:** D-003 (network architecture), D-002 (channels).
 
-- Tenant subnets created with `--dns-nameserver 1.1.1.1 --dns-nameserver 1.0.0.1` (or via the openrc `OS_DNS_NAMESERVERS` env)
-- CAPI workload cluster template variable `OPENSTACK_DNS_NAMESERVERS` set to `1.1.1.1,1.0.0.1` (per `v1-do-doc-07-capi-bootstrap.md` §13)
-- Cloud-internal `*.cloud.neumatrix.local` FQDN tree resolved via static `/etc/hosts` on bootstrap-relevant hosts (jumphost, openstack0-3, LXD containers per charm bootstrap, capi-mgmt — staged in `v1-do-doc-05-vault-init.md` §11 and `v1-do-doc-07-capi-bootstrap.md` §6)
-- Charms continue to use FQDN-based `os-public-hostname` (cert SANs depend on it) — internal resolution via `/etc/hosts` is sufficient
+---
 
-### v2 plan
+## D-021: Octavia amphora image pipeline on the no-DNS dual-endpoint deploy
 
-- Re-introduce Designate (charm + designate-bind + relations + hacluster sub)
-- NS delegation from corporate DNS to designate-bind on a real (non-NAT) network VIP
-- Tenant subnets transitioning to use Designate VIP as their resolver (after corporate DNS delegation lands)
-- Designate v2 deploy on a real-network Roosevelt or v2-testcloud topology where the bridging-host complexity from v1 testcloud does not apply
-- D-011 validation re-introduces a calibrated DNS-resolution criterion (mechanism TBD: NS delegation working end-to-end vs static A records at corporate DNS)
+**Decision:** build the amphora image with the charm-native `octavia-diskimage-retrofit` set `use-internal-endpoints: true`, seeded by a manually uploaded stock Ubuntu base image carrying the five Glance properties the retrofit reads (architecture, os_distro, os_version, version_name, product_name). Park `glance-simplestreams-sync` for the amphora pipeline. The amphora image is `image-format: raw`, tagged `octavia-amphora` to match octavia's `amp-image-tag`.
 
-### v2-residency note
+**Root cause:** on the dual-endpoint, no-DNS topology (D-019), metal-net catalog-callers (gss + its retrofit subordinate) cannot reach Glance: the public Glance FQDN does not resolve/route from the metal net, and the internal-endpoint cert carries no FQDN SAN (so an `/etc/hosts` FQDN->metal-VIP mapping fails TLS). gss `use-internal-endpoints` steers only its Keystone auth to internal; its glance/swift clients still use the public FQDN and there is no further charm-native lever -- a charm gap on the no-DNS topology. The retrofit's `use-internal-endpoints` lever DOES cover its build path, so it is the charm-native amphora builder here.
 
-The IPv6 prefixes already imported into NetBox (and marked Reservation status) include allocations that would be appropriate for Designate's VIPs in a v2 design — these stay in NetBox as Reservation until v2 work begins.
+**Status:** Decided + validated end-to-end (v1): the retrofit, over internal endpoints, reads the seeded base and writes the amphora; gss parked; octavia + subordinates active/idle.
+
+**Roosevelt:** cloud-internal DNS + FQDN-valid certs removes the manual seed and fixes gss end to end.
+
+**Related:** D-007 (Octavia inclusion), D-019 (no-DNS root cause).
+
+---
+
+## D-028: Defer the CAPI v1beta2-contract cutover (deploy the single-contract v1beta1 stack)
+
+**Decision:** defer adopting the CAPI v1beta2-CONTRACT generation until upstream ships it correctly for this path; deploy the clean single-contract v1beta1 stack now.
+
+**Context:** while grounding the (then-current) Canonical CK8s workload chart, the chart referenced control-plane/bootstrap kinds at apiVersion v1beta1 while the pinned provider served them only at v1beta2 (DOCFIX-022). The broader question -- is the v1beta2-contract generation available and correct for long-term support on this path -- resolved to "not yet."
+
+**Status:** Decided (v1). The CK8s-chart-specific particulars were subsequently retired when D-031 replaced the direct-CAPI CK8s path with Magnum + the azimuth kubeadm charts; the single-contract principle carries forward, and D-042 later made the driver-side contract axis concrete.
+
+**Builds on:** D-022 / D-023 (do-07-era CAPI/CRD work). **Related:** D-031, D-042.
+
+---
+
+## D-029: Defer Keystone SSO (k8s-keystone-auth) to Roosevelt
+
+**Decision:** Keystone SSO for the workload clusters (the chart's `k8s-keystone-auth` addon) is deferred to the next deployment and folded into the Roosevelt cloud-internal-DNS + trusted-cert foundation. v1 workload clusters run the Kubernetes Dashboard with standard token auth; the `k8sKeystoneAuth` addon stays OFF; SSO is not validated on v1.
+
+**Rationale:** enabling it on v1 would produce a non-functional SSO path (TLS failure to the private-CA Keystone endpoint) plus apiserver webhook error noise -- a checked box that does not work -- and forcing it would require forking the addon or fighting CAAPH, neither of which carries forward to Roosevelt.
+
+**Finding (verified 2026-06-05):** k8s-keystone-auth 1.5.1 exposes no keystone-CA option, so it cannot trust a private-CA Keystone endpoint.
+
+**Status:** Decided (v1). **Related:** D-028 (same "land it on the proper foundation later" principle).
+
+---
+
+## D-030: Management-cluster placement -- in-cloud (superseded twice; see D-033, D-035)
+
+**Decision (as taken 2026-06-06):** run the CAPI management plane IN-CLOUD for the v1 rehearsal (CAPI core + CAPO + cluster-api-addon-provider as VMs on the OpenStack cloud, following an Azimuth seed + HA pattern with a `clusterctl move` pivot to a self-hosted in-cloud management cluster). Out-of-cloud was recorded as a deferred alternative for Roosevelt.
+
+**Status:** SUPERSEDED. First by D-033 (out-of-cloud Canonical `k8s`-charm on MAAS); then -- after D-033's dual-homed node hit an unfixable pod-egress fault -- placement returned in-cloud in a simpler single-homed form under D-035. Retained here for lineage.
+
+**Related:** D-031, D-033, D-035.
+
+---
+
+## D-031: Cluster-creation surface + engine -- Magnum + magnum-capi-helm + azimuth kubeadm charts
+
+**Decision:** the tenant Kubernetes service is built from three layers:
+- Surface: OpenStack Magnum (`openstack coe cluster ...`), so tenants and operators manage clusters through the OpenStack API.
+- Driver: the in-tree Cluster API Helm driver `magnum-capi-helm` (opendev.org/openstack/magnum-capi-helm), pip-installed into the Magnum conductor and pointed at a CAPI management cluster via `[capi_helm] kubeconfig_file`.
+- Engine: the azimuth-cloud `capi-helm-charts` `openstack-cluster` chart (kubeadm-based: KubeadmControlPlane / KubeadmConfigTemplate + CAPO OpenStackCluster / OpenStackMachineTemplate + MachineDeployment), with addons (Cilium CNI, OpenStack CCM, Cinder CSI, and so on) installed by the cluster-api-addon-provider.
+- Management-cluster placement: in-cloud for v1 (D-030, later refined by D-035).
+
+**Status:** Decided. Supersedes the do-07 direct-CAPI Canonical CK8s chart path; the CK8s-chart-specific findings (DOCFIX-022 ref patch, etc.) are retired for this path.
+
+**Related:** D-030 / D-035 (placement), D-034 (version constellation), D-036 / D-042 (driver/chart/core coherence).
+
+---
+
+## D-033: Management cluster -- out-of-cloud Canonical k8s-charm on MAAS (superseded by D-035)
+
+**Decision (as taken 2026-06-07):** management cluster = a Canonical Kubernetes cluster deployed with the `k8s` / `k8s-worker` machine charms on MAAS, OUTSIDE OpenStack, made HA by the charms; CAPI layer via `clusterctl init --infrastructure openstack` + cluster-api-addon-provider, version-pinned to the capi-helm-charts release (NOT the D-022 do-07 pins); the management cluster does not run the OpenStack CCM for itself (CAPO reaches OpenStack through a `clouds.yaml` pointed at the public API endpoints); lifecycle via Juju.
+
+**Status:** SUPERSEDED by D-035. The chosen node (capi-mgmt MAAS VM) is necessarily dual-homed (MAAS PXE on metal, API VIPs on provider), and pod egress from that multi-NIC node to the API VIPs failed (the Cilium reverse-NAT reply was mis-forwarded out the wrong NIC instead of redirected into the pod). Retained here for lineage.
+
+**Supersedes:** D-030 (placement) + D-032 (azimuth-config tooling). **Builds on:** D-031.
+
+---
+
+## D-034: CAPI version constellation pinned to capi-helm-charts dependencies.json
+
+**Decision:** pin the management-cluster CAPI constellation to the `dependencies.json` published with a chosen `capi-helm-charts` RELEASE TAG, read at deploy time on the jumphost with `jq` (dynamic lookup, no hand-picked versions). Retire D-022 "Option A" (driver 1.3.0 / CAPO v0.10.x / v1alpha6) as obsolete.
+
+**Rationale:** the magnum-capi-helm driver does not hand-pick component versions; its own CI installs the management CAPI stack by reading the per-release `dependencies.json` and running a fixed install sequence -- that file is the single matched-and-tested set. Hand-picking fights the upstream model, and v1alpha6 has been removed from current cluster-api-provider-openstack. (At tag 0.25.1 the set is CAPI v1.13.2, CAPO v0.14.4, cert-manager v1.20.2, ORC v2.5.0, addon-provider 0.12.0, janitor 0.11.0, helm v3.17.3; appendix-B carries the as-built snapshot.)
+
+**Status:** Adopted 2026-06-08. **Supersedes:** D-022. **Amended by:** D-042 (adds the driver<->core contract-coherence rule). **Related:** D-031, D-028 (CRD-contract note, now subsumed).
+
+---
+
+## D-035: Management-cluster placement -- in-cloud single-homed tenant VM
+
+**Decision:** run the CAPI management cluster as a single-homed in-cloud tenant VM (`capi-mgmt-v2`): one NIC on the management tenant subnet (10.20.0.0/24), reached via a floating IP (10.12.7.40); k8s-snap (channel `1.32-classic/stable`), Cilium CNI; not CAPI-self-managed (no `clusterctl move`).
+
+**Rationale:** D-033's out-of-cloud node was necessarily dual-homed and its pod egress to the OpenStack API VIPs failed -- the Cilium reverse-NAT reply was emitted back out the second NIC instead of being redirected into the pod via `cilium_host` (a multi-NIC reverse-path fault; the `k8s` charm exposes too few Cilium annotations to repair it). A single-homed VM removes the second NIC and the fault entirely. The single-NIC pod-egress premise was then proven by the Phase 4 hard gate (an agnhost pod TCP probe to the Keystone VIP 10.12.4.50:5000 returning exitCode 0).
+
+**Status:** Adopted 2026-06-08; pod-egress premise validated. **Supersedes:** D-033 (revisits D-030 in simpler form). **Unaffected:** D-031, D-034.
+
+**Trade-off:** a single-node management cluster is a SPOF with no self-heal -- see D-041 (manual-start policy) and D-040 (the OOM that surfaced it).
+
+---
+
+## D-036: magnum-capi-helm driver / chart / CAPO coherence (resolved)
+
+**Decision / correction:** a mid-session "rebuild Phase 5 on chart 0.10.1" framing -- premised on the GA driver (1.3.0) emitting the v1alpha6 OpenStackCluster CRD and clashing with the modern v1beta1 stack -- is WRONG and is retired. Chart 0.10.1 is the retired v1alpha6 path that D-034 superseded; rebuilding on it would have reversed D-034.
+
+**Verification:** the 1.3.0 driver is api_version-AGNOSTIC (driver.py has zero v1alpha6/v1beta1/apiVersion references; it helm-installs the chart and watches the CAPI `Cluster`, never writing OpenStackCluster directly). The OpenStackCluster apiVersion is set by the CHART: chart 0.25.1 emits `infrastructure.cluster.x-k8s.io/v1beta1`, matching the installed CAPO v0.14.4. The driver's built-in default chart is 0.10.1 (the v1alpha6-era chart); overriding `default_helm_chart_version` to 0.25.1 yields v1beta1. The "1.3.0 emits v1alpha6" claim was true only of the driver's DEFAULT chart, not of the driver pinned to chart 0.25.1.
+
+**Status:** Resolved 2026-06-08. Implements D-031 Phase 3 under the D-034 constellation. NOTE: a SEPARATE axis -- the driver-vs-core CONTRACT, not the chart's CRD string -- is what later required the 1.4.0 driver pin; see D-042. **Related:** D-031, D-034.
+
+---
+
+## D-037: [capi_helm] config persistence on the charm-managed conductor
+
+**Decision:** keep the `[capi_helm]` section in an oslo.config drop-in directory and point the conductor at it: `/etc/magnum/magnum.conf.d/00-capi-helm.conf` (0644, no secrets; it references the 0600 kubeconfig by path), with magnum-conductor launched with `--config-dir /etc/magnum/magnum.conf.d` so oslo.config merges the drop-in over the charm-rendered `magnum.conf`. The charm manages neither the .conf.d directory nor the launch extension, so this survives charm hooks and reproduces on Roosevelt.
+
+**Problem:** the magnum charm (2024.1/stable rev 70) re-renders `magnum.conf` wholesale on hooks and exposes no conf-override option, so a `[capi_helm]` section written into `magnum.conf` would be clobbered.
+
+**Mechanism (load-bearing correction):** the conductor's ExecStart is NOT a direct binary -- it is `/etc/init.d/magnum-conductor systemd-start` (an LSB init script wrapped by systemd), so a systemd ExecStart drop-in appending `--config-dir` is inert (the flag reaches the init script as an ignored positional). The adopted method instead creates `/etc/default/magnum-conductor` (0644; the charm does not manage it) containing `DAEMON_ARGS="$DAEMON_ARGS --config-dir /etc/magnum/magnum.conf.d"`; the init script sources `/etc/default/$NAME` after setting the base `DAEMON_ARGS`, then runs `exec $DAEMON $DAEMON_ARGS`. Verify behaviorally with `/etc/init.d/magnum-conductor show-args` and `ps -ww -C magnum-conductor -o args` (not string-presence).
+
+**Status:** Adopted 2026-06-08 (mechanism revised mid-implementation). **Residual:** breaks silently if a future charm hook writes `/etc/default/magnum-conductor` -- detect via the same show-args/ps check. **Related:** D-031 Phase 3, D-036.
+
+---
+
+## D-040: Raise nova-compute reserved-host-memory on the hyperconverged hosts
+
+**Decision:** set `nova-compute reserved-host-memory` to 8192 MB (from the default 512) so Nova placement accounts for the non-Nova memory co-located on each hyperconverged host. Charm config -> survives redeploy.
+
+**Trigger / root cause:** during the first end-to-end Magnum workload-cluster create, openstack1 hit the kernel OOM-killer (no reboot; single boot since 2026-06-03) and killed a tenant qemu worker VM. The host co-locates nova-compute AND roughly 6 GiB of services invisible to Nova placement (mysqld [innodb-cluster member] ~2.9G, ceph-osd + ceph-mon ~1.2G, neutron workers ~0.7G, nova/apache/cinder/ovs ~1.4G) while Nova reserved only the default 512 MB; under the resulting memory pressure the host swap-thrashed (an ovsdb inactivity-probe storm made the workload API and Juju agent look "down" when the host was in fact thrashing, not down).
+
+**Status:** Adopted + APPLIED 2026-06-09. **Related:** D-035 (the mgmt-VM SPOF the OOM hit), D-041.
+
+---
+
+## D-041: Non-HA deployments default to manual start
+
+**Decision:** non-HA deployments default to MANUAL START -- no automatic VM power-on / auto-recovery is configured by default. Any non-HA deployment must be documented as non-HA, with the rationale that manual-down surfaces incidents (auto-restart masks capacity/health defects). Auto-recovery is an explicit, out-of-band exception, never the silent default.
+
+**Trigger:** after the openstack1 OOM (D-040), CAPI's MachineHealthCheck self-healed the workload worker VMs automatically, but the single-node management VM (capi-mgmt-v2, D-035) was OOM-killed and stayed SHUTOFF -- it does not self-heal or auto-restart, which silently broke magnum reconcile/health and left workload nodes with the CAPI uninitialized taint until it was started by hand. The cost (downtime) was real, but the manual-down is also what forced the investigation that found the OOM root cause headed for Roosevelt.
+
+**Status:** Adopted 2026-06-09 (policy/governance). **Related:** D-035 (the SPOF), D-040 (the OOM).
+
+---
+
+## D-042: magnum-capi-helm driver must be contract-coherent with the CAPI core
+
+**Decision (amends D-034):** the magnum-capi-helm driver pin (Layer B) MUST be contract-coherent with the CAPI core that `dependencies.json` installs (Layer A). When the Layer-A lockfile is a v1beta2-contract core (CAPI v1.13), the driver pin must be a build that understands v1beta2 references; verify this intersection at deploy.
+
+**Symptom / root cause:** capi-test-1 reached CREATE_COMPLETE with every real component healthy (3 Ready nodes, Calico, CCM/CSI/CoreDNS, API LB ACTIVE), yet magnum reported `health_status = UNHEALTHY` deterministically -- only the `infrastructure` sub-check failed ("Infrastructure resource not found"). The 1.3.0 driver reads `apiVersion` off the Cluster's `spec.infrastructureRef`, but under the v1beta2 contract that ref is version-less, so the health GET resolves nothing. The create path is unaffected (the chart templates the resource versions) -- a cosmetic health false-negative. The governing axis is the CAPI CONTRACT a provider implements toward core, not the CRD apiVersion string (per D-028); rolling back to a v1beta1 core would mean pinning an EOL CAPI for a Roosevelt rehearsal -- the wrong direction.
+
+**Fix:** pin a driver build carrying the per-kind `[capi_helm] api_resources` override and set it so the health lookups use the served versions. As of 2026-06-09, D-042 recorded this capability as UNRELEASED (development series only; released line then 1.1.0/1.2.0/1.3.0), with the interim = a current-series commit for the testcloud and a released-tag pin deferred to Roosevelt.
+
+**Subsequent update (driver-fix work):** the released `magnum-capi-helm==1.4.0` was then confirmed to ship the `api_resources` feature, so the released-tag pin is now available -- v1 pins 1.4.0 with an explicit `api_resources` and targets `health_status = HEALTHY` (installed in phase-07; as-built in appendix-B). This replaces D-042's interim dev-commit path.
+
+**Operational caveat (while any health false-negative persists):** do NOT wire magnum auto-healing to `health_status` -- a persistent false UNHEALTHY could misfire; CAPI MachineHealthCheck handles node healing independently.
+
+**Status:** Adopted 2026-06-09; fix landed via the 1.4.0 pin. **Amends:** D-034. **Related:** D-028 (the contract axis made concrete), D-031, D-035.
 
 ---
 
@@ -438,4 +564,12 @@
 | 2026-05-22 | D-015 v1/v2 fork added; D-004 and D-004a marked v2-scope; D-016 IPv4 tenant pool hybrid model added; D-014 updated with new repo name                                                                                             | v1/v2 fork session                                   |
 | 2026-05-22 | D-017 CAPI bootstrap full-rebuild lifecycle added; D-018 MAAS-release-direct teardown added. D-013 marked superseded by D-018. D-007 Layer B updated to reference D-017 and `runbooks/04a-capi-bootstrap-cluster.md`.             | Teardown planning + handoff session                  |
 | 2026-05-22 | D-002 hacluster row added (channel `2.4/stable`) per Canonical Charm Delivery table, verified against Charmhub. D-007 Layer B driver pin updated: `stackhpc/magnum-capi-helm` v0.13.0 → `openstack/magnum-capi-helm` 1.1.0 (PyPI; stackhpc fork archived Dec 2024). | Caracal channel verification + driver pin correction |
-| 2026-05-27 | D-019 added (DNS scope reduction; Designate deferred to v2). D-008 marked superseded by D-019. D-011 amended to remove "Designate resolves" criterion. | Testcloud topology investigation + v1 scope refinement |
+| 2026-05-22 | D-007 Layer B kubeconfig target corrected: bootstrap k3s → workload cluster (post-pivot per workstream 3b mandatory `clusterctl move`). CAPI mgmt plane paragraph updated accordingly. | Workstream 3 cleanup (post-pivot semantics) |
+| 2026-05-29 | D-019 (Designate deferral) and D-020 (dual provider+metal API VIPs) recorded as already-taken; folded into this doc in the 2026-06-09 consolidation. | Deploy execution / handoff |
+| 2026-05-30 | D-021 Octavia amphora pipeline (charm-native retrofit over internal endpoints; gss parked) added. | Octavia enablement |
+| 2026-06-05 | D-028 (defer v1beta2-contract cutover) and D-029 (defer Keystone SSO) added. | CAPI path research |
+| 2026-06-06 | D-030 (mgmt-cluster placement: in-cloud) and D-031 (Magnum + magnum-capi-helm + azimuth kubeadm engine) added. | Magnum/CAPI surface decisions |
+| 2026-06-07 | D-033 (mgmt cluster: out-of-cloud k8s-charm on MAAS) added; supersedes D-030 and D-032. | Mgmt-cluster shape |
+| 2026-06-08 | D-034 (CAPI constellation pinned to dependencies.json; supersedes D-022), D-035 (in-cloud single-homed mgmt VM; supersedes D-033), D-036 (driver/chart/CAPO coherence resolved), D-037 ([capi_helm] via /etc/default DAEMON_ARGS) added. | In-cloud mgmt pivot |
+| 2026-06-09 | D-040 (reserved-host-memory 8192), D-041 (non-HA manual-start policy), D-042 (driver<->core contract coherence; 1.4.0 pin) added. | OOM incident + driver fix |
+| 2026-06-09 | D-019..D-042 consolidated into this document (15 decisions). Existing D-001..D-018 left intact (em-dash style preserved); the new entries are ASCII. | Repo sanitation / doc refresh |
diff --git a/fix-bundle-add-memcached.py b/fix-bundle-add-memcached.py
deleted file mode 100644
index da7bebb..0000000
--- a/fix-bundle-add-memcached.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-"""
-fix-bundle-add-memcached.py   (BUNDLEFIX-004, part 2)
-
-Adds the `memcached` application AND the
-`nova-cloud-controller:memcache <-> memcached:cache` relation to the Caracal
-bundle, matching the live `juju deploy memcached` + `juju integrate` already
-applied to the running model.
-
-Why: nova-cloud-controller treats `memcache` as a required relation. The Caracal
-rebuild omitted memcached entirely, so a fresh `juju deploy` of the bundle would
-leave nova-cc blocked on "Missing relations: memcache" (no instance scheduling).
-
-App block added (placement to: [lxd:8] = openstack0, where it landed live; metal
-space; latest/stable, the only stable channel for the memcached charm):
-
-  memcached:
-    charm: memcached
-    channel: latest/stable
-    num_units: 1
-    to: [lxd:8]
-    bindings: *internal-bindings
-    constraints: arch=amd64
-
-Relation added:
-  - [nova-cloud-controller:memcache, memcached:cache]
-
-Safe by construction: line edits (preserve anchors/comments/formatting),
-timestamped .bak, unified diff, idempotent, yaml.safe_load verification.
-
-Usage: python3 fix-bundle-add-memcached.py [path/to/bundle.yaml]   (default ./bundle.yaml)
-"""
-import sys, os, difflib, datetime
-
-DEFAULT = "bundle.yaml"
-
-APP_BLOCK = [
-    "",
-    "  # memcached: nova-cloud-controller token/cell caching (BUNDLEFIX-004)",
-    "  memcached:",
-    "    charm: memcached",
-    "    channel: latest/stable",
-    "    num_units: 1",
-    "    to: [lxd:8]",
-    "    bindings: *internal-bindings",
-    "    constraints: arch=amd64",
-    "",
-]
-RELATION_LINE = "  - [nova-cloud-controller:memcache, memcached:cache]"
-
-
-def main():
-    path = sys.argv[1] if len(sys.argv) > 1 else DEFAULT
-    if not os.path.isfile(path):
-        print(f"[ABORT] not found: {path}")
-        return 2
-
-    original = open(path, encoding="utf-8").read()
-    lines = original.splitlines()
-
-    have_app = any(l.strip().startswith("memcached:") for l in lines)
-    have_rel = "memcached:cache" in original
-    if have_app and have_rel:
-        print("[OK/IDEMPOTENT] memcached app and relation already present; no change.")
-        return 0
-    if have_app != have_rel:
-        print(f"[ABORT] partial state (app={have_app}, relation={have_rel}); fix by hand to avoid duplication.")
-        return 3
-
-    # Bundle order here is description -> variables -> machines -> applications -> relations,
-    # so `relations:` is the END of the applications section. Anchor BOTH inserts to it:
-    # the app block goes immediately before `relations:` (last app), the relation immediately after.
-    rel_idx = next((i for i, l in enumerate(lines) if l.rstrip() == "relations:"), None)
-    if rel_idx is None:
-        print("[ABORT] could not find top-level 'relations:' key.")
-        return 4
-
-    out = []
-    for i, l in enumerate(lines):
-        if i == rel_idx:
-            out.extend(APP_BLOCK)        # app block: end of applications (just before relations:)
-        out.append(l)
-        if i == rel_idx:
-            out.append(RELATION_LINE)    # relation: first entry after relations:
-    new = "\n".join(out) + ("\n" if original.endswith("\n") else "")
-
-    print("=== unified diff ===")
-    print("\n".join(difflib.unified_diff(
-        original.splitlines(), new.splitlines(),
-        fromfile=f"{path} (orig)", tofile=f"{path} (new)", lineterm="")))
-
-    try:
-        import yaml
-        d = yaml.safe_load(new)
-        a = d["applications"]
-        rels = d.get("relations", [])
-        assert "memcached" in a, "memcached app missing after edit"
-        assert a["memcached"].get("charm") == "memcached", "charm != memcached"
-        assert a["memcached"].get("bindings") == {"": "metal"}, f"bindings={a['memcached'].get('bindings')}"
-        mc = [r for r in rels if any("memcache" in str(x) for x in r)]
-        assert mc, "memcache relation missing after edit"
-        print(f"[VERIFY] OK: memcached app present, bindings {{'': 'metal'}}, relation {mc}")
-        print(f"[VERIFY] totals now: apps={len(a)} relations={len(rels)}")
-    except ImportError:
-        print("[WARN] PyYAML missing; skipped semantic verify (re-verify on jumphost after pull).")
-    except Exception as e:
-        print(f"[ABORT] verification failed: {e}")
-        return 5
-
-    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-    bak = f"{path}.bak-{ts}"
-    open(bak, "w", encoding="utf-8").write(original)
-    open(path, "w", encoding="utf-8").write(new)
-    print(f"[WROTE] {path}  (backup: {bak})")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/fix-bundle-haclusters.py b/fix-bundle-haclusters.py
deleted file mode 100644
index e3d70e5..0000000
--- a/fix-bundle-haclusters.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python3
-"""
-fix-bundle-haclusters.py - BUNDLEFIX-003
-
-Add `options: { cluster_count: 1 }` to the 10 *active* testcloud haclusters so
-the committed bundle matches the running model (we already set this at runtime
-via `juju config`). Single-unit principals on the testcloud cannot form the
-default 3-peer cluster; cluster_count=1 lets a 1-node cluster form and bring up
-the (reachable, public->provider) VIP. Roosevelt's separate 3-unit bundle keeps
-the default.
-
-Text/line based - never round-trips YAML, so anchors/comments/formatting are
-preserved. Only touches the named, *uncommented* hacluster lines; the commented
-v2-deferred ones (vault-hacluster, ceph-radosgw-hacluster, designate-hacluster)
-are left untouched. Idempotent: skips a line that already has cluster_count, and
-aborts cleanly if nothing needs changing.
-
-Usage: python3 fix-bundle-haclusters.py [path-to-bundle.yaml]   (default ./bundle.yaml)
-"""
-import sys, os, re, shutil, difflib, datetime
-
-PATH = sys.argv[1] if len(sys.argv) > 1 else "bundle.yaml"
-HACLUSTERS = ["keystone", "glance", "nova-cloud-controller", "neutron-api",
-              "cinder", "octavia", "barbican", "magnum", "placement",
-              "openstack-dashboard"]
-INSERT_AFTER = "channel: 2.4/stable }"
-INSERT_WITH = "channel: 2.4/stable, options: { cluster_count: 1 } }"
-
-
-def abort(msg):
-    sys.stderr.write("ABORT (no changes written): %s\n" % msg)
-    sys.exit(1)
-
-
-if not os.path.isfile(PATH):
-    abort("file not found: %s (run from the repo root, or pass the path)" % PATH)
-
-with open(PATH, "r", newline="") as fh:
-    orig = fh.readlines()
-lines = list(orig)
-
-changed = []
-for name in HACLUSTERS:
-    # uncommented inline def line for this hacluster
-    pat = re.compile(r'^\s*%s-hacluster:\s*\{\s*charm:\s*hacluster' % re.escape(name))
-    hits = [i for i, ln in enumerate(lines)
-            if pat.match(ln) and not ln.lstrip().startswith("#")]
-    if len(hits) != 1:
-        abort("expected exactly 1 uncommented '%s-hacluster' inline def, found %d"
-              % (name, len(hits)))
-    i = hits[0]
-    if "cluster_count" in lines[i]:
-        abort("%s-hacluster already has cluster_count - already applied? inspect."
-              % name)
-    if INSERT_AFTER not in lines[i]:
-        abort("%s-hacluster line not in expected inline shape: %r"
-              % (name, lines[i].strip()))
-    lines[i] = lines[i].replace(INSERT_AFTER, INSERT_WITH, 1)
-    changed.append(name)
-
-if len(changed) != len(HACLUSTERS):
-    abort("only changed %d of %d haclusters" % (len(changed), len(HACLUSTERS)))
-
-ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-bak = "%s.bak-%s" % (PATH, ts)
-shutil.copy2(PATH, bak)
-with open(PATH, "w", newline="") as fh:
-    fh.writelines(lines)
-
-print("Backup written: %s" % bak)
-print("=== unified diff ===")
-sys.stdout.writelines(difflib.unified_diff(
-    orig, lines, fromfile="bundle.yaml (before)", tofile="bundle.yaml (after)"))
-print("")
-
-try:
-    import yaml
-except Exception:
-    print("NOTE: PyYAML not importable - semantic verification skipped; re-verify on jumphost.")
-    sys.exit(0)
-
-apps = yaml.safe_load(open(PATH))["applications"]
-print("=== verification ===")
-print("YAML parses: PASS")
-ok = True
-for name in HACLUSTERS:
-    a = apps.get("%s-hacluster" % name, {})
-    cc = (a.get("options") or {}).get("cluster_count")
-    p = (cc == 1)
-    ok &= p
-    print("  %-30s cluster_count==1 : %s" % (name + "-hacluster", "PASS" if p else "FAIL (%r)" % cc))
-# deferred ones must NOT have appeared
-for absent in ("vault-hacluster", "ceph-radosgw-hacluster", "designate-hacluster"):
-    p = absent not in apps
-    ok &= p
-    print("  %-30s stays absent     : %s" % (absent, "PASS" if p else "FAIL"))
-print("\nRESULT:", "ALL CHECKS PASS"
-      if ok else "FAILURES - revert: cp %s %s" % (bak, PATH))
-sys.exit(0 if ok else 2)
diff --git a/fix-bundle-metal-vips.py b/fix-bundle-metal-vips.py
deleted file mode 100644
index 0ac5049..0000000
--- a/fix-bundle-metal-vips.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-"""
-BUNDLEFIX-006 (D-020): append the metal HA VIP to each clustered API charm's `vip` option.
-
-For every line of the form `vip: 10.12.4.<N>` where N is in the reserved provider API-VIP
-range (224..254), rewrite it to `vip: "10.12.4.<N> 10.12.8.<N>"` so the charm advertises a
-provider VIP (public endpoint) AND a metal VIP (internal/admin endpoints). This is the
-spaces-native dual-VIP fix validated live on placement: internal/admin bindings = metal, so
-resolve_address matches the metal VIP; public binding = provider, matches the provider VIP.
-No binding/anchor change and no os-*-network needed.
-
-Safety properties (same pattern as the prior fix scripts):
-  - pure line edit; never round-trips YAML, so anchors/aliases/comments are preserved
-  - STRICT match: only single `10.12.4.<224-254>` values are rewritten; anything else (already
-    dual, out of range, unexpected format) is left untouched -> fail-safe, never mangles
-  - idempotent: lines already carrying a `10.12.4.x 10.12.8.x` pair are skipped
-  - timestamped .bak, unified diff to stdout, and a best-effort yaml.safe_load semantic check
-    (skipped where PyYAML is absent, e.g. the Windows workstation; the jumphost re-verifies)
-"""
-import sys
-import re
-import datetime
-import shutil
-import difflib
-
-PROVIDER_NET = "10.12.4."
-METAL_NET = "10.12.8."
-VIP_LO, VIP_HI = 224, 254  # reserved API-VIP range (same last-octet on both nets)
-
-VIP_LINE = re.compile(r'^(?P<indent>\s*)vip:\s*(?P<q>["\']?)(?P<val>[^"\'\n]*)(?P=q)\s*$')
-SINGLE = re.compile(r'^10\.12\.4\.(\d+)$')
-DOUBLE = re.compile(r'^10\.12\.4\.(\d+)\s+10\.12\.8\.(\d+)$')
-
-
-def main():
-    if len(sys.argv) != 2:
-        print("usage: fix-bundle-metal-vips.py <bundle.yaml>")
-        return 2
-    path = sys.argv[1]
-    try:
-        with open(path) as f:
-            original = f.read()
-    except OSError as e:
-        print(f"[ABORT] cannot read {path}: {e}")
-        return 3
-
-    lines = original.split("\n")
-    changed = 0
-    skipped_already = 0
-    untouched_unexpected = []
-
-    out = []
-    for l in lines:
-        m = VIP_LINE.match(l)
-        if m:
-            val = m.group("val").strip()
-            if DOUBLE.match(val):
-                skipped_already += 1
-                out.append(l)
-                continue
-            sm = SINGLE.match(val)
-            if sm:
-                octet = int(sm.group(1))
-                if VIP_LO <= octet <= VIP_HI:
-                    out.append(f'{m.group("indent")}vip: "{PROVIDER_NET}{octet} {METAL_NET}{octet}"')
-                    changed += 1
-                    continue
-            # vip line, but not a single in-range provider VIP -> leave alone, but note it
-            untouched_unexpected.append(val)
-        out.append(l)
-
-    if untouched_unexpected:
-        print(f"[NOTE] {len(untouched_unexpected)} vip line(s) left untouched (unexpected value/range): "
-              f"{untouched_unexpected}")
-
-    if changed == 0:
-        if skipped_already:
-            print(f"[OK/IDEMPOTENT] {skipped_already} vip line(s) already carry a metal VIP; no change.")
-            return 0
-        print("[ABORT] found no `vip: 10.12.4.224-254` lines to update.")
-        return 4
-
-    new = "\n".join(out)
-    if original.endswith("\n") and not new.endswith("\n"):
-        new += "\n"
-
-    print("=== unified diff ===")
-    sys.stdout.writelines(difflib.unified_diff(
-        original.splitlines(keepends=True),
-        new.splitlines(keepends=True),
-        fromfile=f"{path} (orig)", tofile=f"{path} (new)"))
-
-    try:
-        import yaml
-        d = yaml.safe_load(new)
-        apps = d.get("applications", {}) or {}
-        dual = sorted(
-            a for a, c in apps.items()
-            if isinstance(c, dict) and isinstance(c.get("options"), dict)
-            and isinstance(c["options"].get("vip"), str)
-            and len(c["options"]["vip"].split()) == 2
-        )
-        print(f"\n[VERIFY] yaml parses OK; {len(dual)} charm(s) now have a 2-address vip:")
-        for a in dual:
-            print(f"           {a}: {apps[a]['options']['vip']}")
-    except ImportError:
-        print("\n[VERIFY] PyYAML not present (Windows workstation) - semantic check skipped; "
-              "jumphost will re-verify after pull.")
-    except Exception as e:
-        print(f"\n[ABORT] yaml verify failed, not writing: {e}")
-        return 5
-
-    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-    bak = f"{path}.bak-{ts}"
-    shutil.copy2(path, bak)
-    with open(path, "w") as f:
-        f.write(new)
-    print(f"\n[WROTE] {path}  (backup: {bak})")
-    print(f"[SUMMARY] updated {changed} vip line(s); {skipped_already} already dual; "
-          f"{len(untouched_unexpected)} untouched.")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/fix-bundle-router-bindings.py b/fix-bundle-router-bindings.py
deleted file mode 100644
index 8fc8f0b..0000000
--- a/fix-bundle-router-bindings.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-"""
-fix-bundle-router-bindings.py   (BUNDLEFIX-005, part 2 / option A)
-
-Adds `bindings: *internal-bindings` to every mysql-router application block in the
-Caracal bundle, so the router subordinates bind to the metal space -- matching the
-live `juju bind <router> metal` fix already applied to the running model.
-
-Why: without an explicit binding the routers default to the empty 'alpha' space,
-which resolves to the container's PROVIDER address. The cluster then grants
-mysqlrouteruser@<provider-addr>, but the router's actual TCP connection to the
-metal-only cluster egresses the metal interface -> grant host != source ->
-"Access denied 1045" -> mysqlrouter never bootstraps. Binding to metal makes the
-advertised address == the connection source.
-
-Safe by construction:
-  - pure line edits (NO YAML round-trip; preserves anchors, comments, formatting)
-  - timestamped .bak
-  - prints a unified diff
-  - idempotent (skips any router that already carries a bindings line)
-  - yaml.safe_load verification of the result, asserting every mysql-router app
-    resolves to bindings {'': 'metal'} via the *internal-bindings anchor
-  - aborts unless it finds the expected mysql-router blocks and they verify
-
-Usage:
-  python3 fix-bundle-router-bindings.py [path/to/bundle.yaml]   (default ./bundle.yaml)
-"""
-import sys, os, difflib, datetime
-
-DEFAULT = "bundle.yaml"
-
-
-def transform(lines):
-    """Insert `<indent>bindings: *internal-bindings` after the channel line of
-    every `charm: mysql-router` app block that doesn't already have a bindings line."""
-    out = []
-    prev_is_mr_charm = False
-    found = inserted = skipped = 0
-    for idx, line in enumerate(lines):
-        out.append(line)
-        stripped = line.strip()
-        if prev_is_mr_charm and stripped.startswith("channel:"):
-            found += 1
-            nxt = lines[idx + 1].strip() if idx + 1 < len(lines) else ""
-            if nxt.startswith("bindings:"):
-                skipped += 1
-            else:
-                indent = line[: len(line) - len(line.lstrip())]
-                out.append(f"{indent}bindings: *internal-bindings")
-                inserted += 1
-        prev_is_mr_charm = (stripped == "charm: mysql-router")
-    return out, found, inserted, skipped
-
-
-def main():
-    path = sys.argv[1] if len(sys.argv) > 1 else DEFAULT
-    if not os.path.isfile(path):
-        print(f"[ABORT] not found: {path}")
-        return 2
-
-    with open(path, "r", encoding="utf-8") as f:
-        original = f.read()
-    lines = original.splitlines()
-
-    out, found, inserted, skipped = transform(lines)
-    new = "\n".join(out) + ("\n" if original.endswith("\n") else "")
-
-    if found == 0:
-        print("[ABORT] no `charm: mysql-router` + `channel:` blocks found - unexpected structure.")
-        return 3
-    if inserted == 0 and skipped == found:
-        print(f"[OK/IDEMPOTENT] all {found} mysql-router apps already bound; no change.")
-        return 0
-
-    print("=== unified diff ===")
-    diff = "\n".join(difflib.unified_diff(
-        original.splitlines(), new.splitlines(),
-        fromfile=f"{path} (orig)", tofile=f"{path} (new)", lineterm=""))
-    print(diff or "(no diff)")
-    print(f"=== mysql-router blocks: {found} | inserted: {inserted} | already-bound: {skipped} ===")
-
-    # semantic verification (anchors resolve under safe_load)
-    try:
-        import yaml
-        doc = yaml.safe_load(new)
-        apps = (doc or {}).get("applications", {}) or {}
-        mr = {k: v for k, v in apps.items()
-              if isinstance(v, dict) and v.get("charm") == "mysql-router"}
-        bad = {k: v.get("bindings") for k, v in mr.items() if v.get("bindings") != {"": "metal"}}
-        if bad:
-            print(f"[ABORT] verification failed; not bound to {{'': 'metal'}}: {bad}")
-            return 4
-        print(f"[VERIFY] yaml.safe_load OK; all {len(mr)} mysql-router apps -> bindings {{'': 'metal'}}.")
-    except ImportError:
-        print("[WARN] PyYAML missing; skipped semantic verify (re-verify on jumphost after pull).")
-    except Exception as e:
-        print(f"[ABORT] yaml verification error: {e}")
-        return 5
-
-    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-    bak = f"{path}.bak-{ts}"
-    with open(bak, "w", encoding="utf-8") as f:
-        f.write(original)
-    with open(path, "w", encoding="utf-8") as f:
-        f.write(new)
-    print(f"[WROTE] {path}  (backup: {bak})")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/fix-bundle-v1.py b/fix-bundle-v1.py
deleted file mode 100644
index 7c3200b..0000000
--- a/fix-bundle-v1.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/usr/bin/env python3
-"""
-fix-bundle-v1.py - Option-A bundle fix for the Caracal v1 deploy.
-
-SUPERSEDES fix-api-bindings.py (which did only edits 1-2). Run this against the
-*original* bundle.yaml (i.e. after `git restore bundle.yaml` if the earlier
-script was already applied).
-
-All edits are text/line based - YAML is never round-tripped, so anchors,
-aliases, comments and formatting are preserved. Verification uses safe_load
-(read only). Aborts WITHOUT writing if the bundle is not in the expected
-pre-fix shape.
-
-Edits:
-  1. Shrink &api-bindings to the two keys that carry meaning:
-        "":     metal
-        public: provider
-     (drops admin/internal/shared-db/amqp/certificates/cluster/ha - all 'metal',
-      i.e. the "" default - which were causing 'unknown endpoint' deploy errors
-      on keystone / ceph-radosgw / openstack-dashboard.)
-  2. vault bindings: *api-bindings -> *internal-bindings (vault is internal-only).
-  3. Remove vault's options: block (vip + os-public-hostname) - vault is a
-     single unit on the testcloud (3 at Roosevelt); a provider VIP is both
-     unreachable from metal-bound vault and pointless at one unit.
-  4. Comment out the vault-hacluster subordinate.
-  5. Comment out the [vault:ha, vault-hacluster:ha] relation.
-
-Net effect on counts: VIPs 11->10, apps 51->50, relations 98->97.
-Vault HA is restored at Roosevelt where it is genuinely 3-unit with a real
-(metal) VIP from NetBox.
-
-Usage: python3 fix-bundle-v1.py [path-to-bundle.yaml]   (default ./bundle.yaml)
-"""
-import sys, os, re, shutil, difflib, datetime
-
-PATH = sys.argv[1] if len(sys.argv) > 1 else "bundle.yaml"
-DROP = {"admin", "internal", "shared-db", "amqp", "certificates", "cluster", "ha"}
-KEEP = {'""', "public"}
-VAULT_OPTS_EXPECTED = {"vip", "os-public-hostname"}
-
-
-def abort(msg):
-    sys.stderr.write("ABORT (no changes written): %s\n" % msg)
-    sys.exit(1)
-
-
-def indent_of(line):
-    return len(line) - len(line.lstrip())
-
-
-if not os.path.isfile(PATH):
-    abort("file not found: %s (run from the repo root, or pass the path)" % PATH)
-
-with open(PATH, "r", newline="") as fh:
-    orig = fh.readlines()
-lines = list(orig)
-
-# ---------- Edit 1: shrink &api-bindings ----------
-anchor = next((i for i, ln in enumerate(lines)
-               if re.match(r'^api-bindings:\s*&api-bindings$', ln.strip())), None)
-if anchor is None:
-    abort("could not locate 'api-bindings: &api-bindings'")
-a_indent = indent_of(lines[anchor])
-j, kept, dropped = anchor + 1, [], []
-while j < len(lines):
-    raw = lines[j]
-    if raw.strip() == "" or indent_of(raw) <= a_indent:
-        break
-    key = raw.strip().split(":", 1)[0].strip()
-    (dropped if key in DROP else kept).append(key if key in DROP else raw)
-    j += 1
-kept_keys = {l.strip().split(":", 1)[0].strip() for l in kept}
-if kept_keys != KEEP or set(dropped) != DROP:
-    abort("api-bindings not in expected pre-fix shape (kept=%s dropped=%s)"
-          % (sorted(kept_keys), sorted(dropped)))
-lines = lines[:anchor + 1] + kept + lines[j:]
-
-# ---------- locate vault app block (post edit-1 indices) ----------
-vault = next((i for i, ln in enumerate(lines)
-              if ln.strip() == "vault:" and indent_of(ln) == 2), None)
-if vault is None:
-    abort("could not locate '  vault:' application block")
-# block end = next line at indent <= 2 (non-blank)
-vend = vault + 1
-while vend < len(lines):
-    if lines[vend].strip() and indent_of(lines[vend]) <= 2:
-        break
-    vend += 1
-
-# ---------- Edit 2: vault bindings -> internal-bindings ----------
-b_fixed = False
-for k in range(vault + 1, vend):
-    if re.match(r'^\s*bindings:\s*\*api-bindings\b', lines[k]):
-        lines[k] = lines[k].replace("*api-bindings", "*internal-bindings", 1)
-        b_fixed = True
-        break
-if not b_fixed:
-    abort("vault 'bindings: *api-bindings' not found (already changed?)")
-
-# ---------- Edit 3: remove vault options: block ----------
-opt = next((k for k in range(vault + 1, vend)
-            if re.match(r'^\s{4}options:\s*$', lines[k])), None)
-if opt is None:
-    abort("vault 'options:' line not found")
-opt_indent = indent_of(lines[opt])
-c = opt + 1
-opt_children = []
-while c < vend and lines[c].strip() and indent_of(lines[c]) > opt_indent:
-    opt_children.append(lines[c].strip().split(":", 1)[0].strip())
-    c += 1
-if set(opt_children) != VAULT_OPTS_EXPECTED:
-    abort("vault options are %s, expected %s - inspect by hand (won't blind-delete)"
-          % (sorted(opt_children), sorted(VAULT_OPTS_EXPECTED)))
-del lines[opt:c]          # remove 'options:' + its children
-
-# ---------- Edit 4: comment out the vault-hacluster subordinate ----------
-hac = [i for i, ln in enumerate(lines)
-       if re.match(r'^\s*vault-hacluster:', ln) and not ln.lstrip().startswith("#")]
-if len(hac) != 1:
-    abort("expected exactly 1 uncommented 'vault-hacluster:' line, found %d" % len(hac))
-i = hac[0]
-ind = indent_of(lines[i])
-lines[i] = lines[i][:ind] + "# " + lines[i][ind:]
-
-# ---------- Edit 5: comment out the vault:ha relation ----------
-rel = [i for i, ln in enumerate(lines)
-       if ("vault:ha" in ln and "vault-hacluster:ha" in ln
-           and not ln.lstrip().startswith("#"))]
-if len(rel) != 1:
-    abort("expected exactly 1 uncommented vault:ha relation, found %d" % len(rel))
-i = rel[0]
-ind = indent_of(lines[i])
-lines[i] = lines[i][:ind] + "# " + lines[i][ind:]
-
-# ---------- Backup + write ----------
-ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-bak = "%s.bak-%s" % (PATH, ts)
-shutil.copy2(PATH, bak)
-with open(PATH, "w", newline="") as fh:
-    fh.writelines(lines)
-
-print("Backup written: %s" % bak)
-print("=== unified diff ===")
-sys.stdout.writelines(difflib.unified_diff(
-    orig, lines, fromfile="bundle.yaml (before)", tofile="bundle.yaml (after)"))
-print("")
-
-# ---------- Verify ----------
-try:
-    import yaml
-except Exception:
-    print("NOTE: PyYAML not importable - semantic verification skipped; re-verify on jumphost.")
-    sys.exit(0)
-
-doc = yaml.safe_load(open(PATH))
-apps = doc["applications"]
-rels = doc.get("relations") or []
-MIN = {"": "metal", "public": "provider"}
-INT = {"": "metal"}
-on_min = ["keystone", "ceph-radosgw", "openstack-dashboard", "octavia", "glance",
-          "nova-cloud-controller", "placement", "neutron-api", "cinder",
-          "barbican", "magnum"]
-print("=== verification ===")
-print("YAML parses: PASS")
-ok = True
-for a in on_min:
-    p = apps.get(a, {}).get("bindings") == MIN
-    ok &= p
-    print("  %-22s minimal api-bindings : %s" % (a, "PASS" if p else "FAIL"))
-checks = [
-    ("vault bindings == internal-bindings", apps.get("vault", {}).get("bindings") == INT),
-    ("vault has no options block", apps.get("vault", {}).get("options") in (None, {})),
-    ("vault-hacluster removed from apps", "vault-hacluster" not in apps),
-    ("vault:ha relation removed", not any("vault:ha" in pair for pair in rels)),
-]
-for desc, p in checks:
-    ok &= p
-    print("  %-34s : %s" % (desc, "PASS" if p else "FAIL"))
-nvip = sum(1 for ap in apps.values()
-           if isinstance(ap, dict) and isinstance(ap.get("options"), dict)
-           and str(ap["options"].get("vip", "")).startswith("10.12.4."))
-pv = (nvip == 10)
-ok &= pv
-print("  %-34s : %s" % ("VIP count == 10 (was 11)", "PASS (%d)" % nvip if pv else "FAIL (%d)" % nvip))
-print("\nRESULT:", "ALL CHECKS PASS"
-      if ok else "FAILURES - revert: cp %s %s" % (bak, PATH))
-sys.exit(0 if ok else 2)
diff --git a/overlays/vr0-dc0-testcloud.yaml b/overlays/vr0-dc0-testcloud.yaml
deleted file mode 100644
index f7c8c79..0000000
--- a/overlays/vr0-dc0-testcloud.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Testcloud overlay for VR0 DC0 Omega Cloud
-#
-# STATUS: PLACEHOLDER — drafted alongside bundle.yaml.
-#
-# This overlay pins values specific to the 4-VM KVM testcloud at jumphost
-# vopenstack-jesse. Roosevelt bare-metal would use a different overlay
-# (overlays/roosevelt-prod.yaml — not in this repo) that swaps num_units to 3+,
-# adjusts machine constraints to MAAS tags, and removes any KVM-specific
-# config tuned for libvirt bridges.
-#
-# Per D-009, hacluster relations remain in the main bundle.yaml even though
-# num_units=1 on testcloud. The overlay only changes num_units, not the
-# relation graph.
-#
-# TODO during bundle drafting:
-#   - [ ] num_units=1 overrides per API charm
-#   - [ ] machine constraints (system-id pinning for openstack0-3)
-#   - [ ] bridge-interface-mappings for libvirt virbr1 (provider)
-#   - [ ] storage-backend config for cinder/glance pointing at Ceph
-#   - [ ] Octavia lb-mgmt-* network values (per LBaaS Management VLAN/prefix)
diff --git a/review-bundle.py b/review-bundle.py
deleted file mode 100644
index 665b1fa..0000000
--- a/review-bundle.py
+++ /dev/null
@@ -1,546 +0,0 @@
-#!/usr/bin/env python3
-"""
-review-bundle.py -- comprehensive pre-deploy review of the Charmed OpenStack
-Caracal 2024.1 IPv4-only bundle (VR0 / DC0 / Omega test cloud).
-
-READ-ONLY. Encodes every lesson learned from the 2026-05-28/29/30 deploy
-sessions as a fail-closed check. Superset of audit-bundle-fixes.py.
-
-Severities:
-  FAIL  deploy-blocker or known regression       -> exit 1
-  WARN  review item / possible issue             -> exit 1 only under --strict
-  INFO  informational summary                    -> never affects exit
-
-Dependencies: PyYAML only (already used by the existing fix scripts); rest stdlib.
-ASCII-only output by design (non-ASCII has caused silent daemon failures here).
-
-Usage:
-  python3 review-bundle.py [BUNDLE] [--strict] [--quiet]
-    BUNDLE   path to bundle.yaml (default: ./bundle.yaml)
-    --strict treat WARN as failing for exit code
-    --quiet  suppress PASS/INFO lines (show only WARN/FAIL)
-"""
-
-import sys
-import argparse
-import ipaddress
-
-try:
-    import yaml
-except ImportError:
-    sys.stderr.write("ERROR: PyYAML not installed (pip install pyyaml --break-system-packages)\n")
-    sys.exit(2)
-
-# --------------------------------------------------------------------------- #
-# Config -- the known-good baseline. Adjust here if the design changes.
-# --------------------------------------------------------------------------- #
-EXPECTED_APPS = 51
-EXPECTED_RELATIONS = 98
-
-PROVIDER_NET = ipaddress.ip_network("10.12.4.0/22")
-METAL_NET = ipaddress.ip_network("10.12.8.0/22")
-VIP_OCTET_MIN = 224          # MAAS reserved metal VIP range 10.12.8.224-254 (D-020)
-VIP_OCTET_MAX = 254
-
-# BUNDLEFIX-001: the 7 per-endpoint binding keys that were phantom and removed.
-# Final anchors are {"":metal} and {"":metal, public:provider} -> none of these
-# should reappear in any app's effective bindings.
-PHANTOM_BINDING_KEYS = {
-    "admin", "internal", "shared-db", "amqp", "certificates", "cluster", "ha",
-}
-
-# D-020 clustered-API charm -> provider VIP last octet (metal mirrors it).
-EXPECTED_CLUSTERED = {
-    "barbican": 224, "cinder": 226, "glance": 228, "keystone": 229,
-    "magnum": 230, "neutron-api": 231, "nova-cloud-controller": 232,
-    "octavia": 233, "openstack-dashboard": 234, "placement": 235,
-}
-
-# Verified Caracal channel matrix (from prior charmhub verification).
-# WARN-only: channels can be intentionally pinned; flag deviation, do not block.
-OPENSTACK_CORE_CHANNEL = "2024.1/stable"
-OPENSTACK_CORE_CHARMS = {
-    "keystone", "glance", "cinder", "cinder-ceph", "nova-cloud-controller",
-    "nova-compute", "neutron-api", "neutron-api-plugin-ovn", "placement",
-    "octavia", "barbican", "magnum", "magnum-dashboard", "openstack-dashboard",
-}
-CHANNEL_MATRIX = {
-    "ovn-central": "24.03/stable", "ovn-chassis": "24.03/stable",
-    "ceph-mon": "squid/stable", "ceph-osd": "squid/stable",
-    "ceph-fs": "squid/stable", "ceph-radosgw": "squid/stable",
-    "mysql-innodb-cluster": "8.0/stable", "mysql-router": "8.0/stable",
-    "rabbitmq-server": "3.9/stable", "vault": "1.8/stable",
-}
-EXPECTED_BASE = "ubuntu@22.04"   # jammy; Caracal-bundle paradigm (not noble)
-
-MAC_RE = None  # compiled below
-import re
-MAC_RE = re.compile(r"([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}")
-
-# --------------------------------------------------------------------------- #
-# Duplicate-key-detecting YAML loader (PyYAML silently keeps the last dup).
-# --------------------------------------------------------------------------- #
-_DUP_KEYS = []
-
-
-class DupKeyLoader(yaml.SafeLoader):
-    def construct_mapping(self, node, deep=False):
-        seen = set()
-        for key_node, _ in node.value:
-            try:
-                key = self.construct_object(key_node, deep=deep)
-            except Exception:
-                continue
-            if isinstance(key, (str, int, float, bool)) or key is None:
-                if key in seen:
-                    _DUP_KEYS.append((str(key), key_node.start_mark.line + 1))
-                seen.add(key)
-        return super().construct_mapping(node, deep)
-
-
-# --------------------------------------------------------------------------- #
-# Reporter
-# --------------------------------------------------------------------------- #
-class Reporter:
-    def __init__(self, quiet=False):
-        self.quiet = quiet
-        self.rows = []  # (section, level, code, msg)
-        self.counts = {"PASS": 0, "WARN": 0, "FAIL": 0, "INFO": 0}
-
-    def add(self, section, level, code, msg):
-        self.rows.append((section, level, code, msg))
-        self.counts[level] = self.counts.get(level, 0) + 1
-
-    def emit(self):
-        section = None
-        for sec, level, code, msg in self.rows:
-            if self.quiet and level in ("PASS", "INFO"):
-                continue
-            if sec != section:
-                print("\n--- %s ---" % sec)
-                section = sec
-            print("  [%-4s] %-10s %s" % (level, code, msg))
-        print("\n==================== SUMMARY ====================")
-        print("  PASS=%d  WARN=%d  FAIL=%d  INFO=%d"
-              % (self.counts["PASS"], self.counts["WARN"],
-                 self.counts["FAIL"], self.counts["INFO"]))
-
-
-# --------------------------------------------------------------------------- #
-# Helpers
-# --------------------------------------------------------------------------- #
-def ep_app(endpoint):
-    """'keystone:shared-db' -> 'keystone'. Non-str -> None."""
-    if not isinstance(endpoint, str):
-        return None
-    return endpoint.split(":", 1)[0]
-
-
-def in_net(addr, net):
-    try:
-        return ipaddress.ip_address(addr) in net
-    except ValueError:
-        return False
-
-
-# --------------------------------------------------------------------------- #
-# Checks
-# --------------------------------------------------------------------------- #
-def check_ascii(R, text):
-    sec = "0. Structure / integrity"
-    bad = []
-    for i, line in enumerate(text.splitlines(), 1):
-        for ch in line:
-            if ord(ch) > 127:
-                bad.append((i, repr(ch)))
-                break
-    if bad:
-        for ln, ch in bad[:20]:
-            R.add(sec, "WARN", "NON-ASCII",
-                  "non-ASCII char %s on line %d (non-ASCII has caused silent daemon failures here)" % (ch, ln))
-        if len(bad) > 20:
-            R.add(sec, "WARN", "NON-ASCII", "...and %d more non-ASCII line(s)" % (len(bad) - 20))
-    else:
-        R.add(sec, "PASS", "ASCII", "file is pure ASCII")
-
-
-def check_structure(R, doc):
-    sec = "0. Structure / integrity"
-    if not isinstance(doc, dict):
-        R.add(sec, "FAIL", "STRUCT-00", "top-level YAML is not a mapping")
-        return None, None
-    if _DUP_KEYS:
-        for k, ln in _DUP_KEYS:
-            R.add(sec, "FAIL", "DUPKEY", "duplicate key '%s' near line %d" % (k, ln))
-    else:
-        R.add(sec, "PASS", "DUPKEY", "no duplicate keys")
-
-    apps = doc.get("applications")
-    rels = doc.get("relations")
-    if not isinstance(apps, dict):
-        R.add(sec, "FAIL", "STRUCT-APPS", "no 'applications' mapping")
-        apps = {}
-    if not isinstance(rels, list):
-        R.add(sec, "FAIL", "STRUCT-RELS", "no 'relations' list")
-        rels = []
-
-    na, nr = len(apps), len(rels)
-    R.add(sec, "INFO" if na == EXPECTED_APPS else "WARN", "APP-COUNT",
-          "applications=%d (baseline %d)" % (na, EXPECTED_APPS))
-    R.add(sec, "INFO" if nr == EXPECTED_RELATIONS else "WARN", "REL-COUNT",
-          "relations=%d (baseline %d)" % (nr, EXPECTED_RELATIONS))
-    return apps, rels
-
-
-def check_relations(R, apps, rels):
-    sec = "1. Relation integrity"
-    bad_shape = miss_colon = dangling = 0
-    for r in rels:
-        if not (isinstance(r, list) and len(r) == 2):
-            R.add(sec, "FAIL", "REL-SHAPE", "relation not a 2-element list: %r" % (r,))
-            bad_shape += 1
-            continue
-        for e in r:
-            if not isinstance(e, str) or ":" not in e:
-                R.add(sec, "FAIL", "REL-COLON", "endpoint missing colon: %r in %r" % (e, r))
-                miss_colon += 1
-            else:
-                a = ep_app(e)
-                if a not in apps:
-                    R.add(sec, "FAIL", "REL-DANGLE",
-                          "endpoint references unknown app '%s' in %r" % (a, r))
-                    dangling += 1
-    if not (bad_shape or miss_colon or dangling):
-        R.add(sec, "PASS", "REL-INT",
-              "all relations well-formed, colon-explicit, both ends resolve to apps")
-
-
-def check_bindings_phantom(R, apps):
-    sec = "2. BUNDLEFIX-001 (phantom binding keys)"
-    hits = 0
-    for name, spec in apps.items():
-        b = (spec or {}).get("bindings")
-        if not isinstance(b, dict):
-            continue
-        bad = sorted(set(b.keys()) & PHANTOM_BINDING_KEYS)
-        if bad:
-            R.add(sec, "FAIL", "PHANTOM",
-                  "%s has phantom per-endpoint binding key(s): %s" % (name, ", ".join(bad)))
-            hits += 1
-    if not hits:
-        R.add(sec, "PASS", "PHANTOM",
-              "no app reintroduces a removed phantom binding key (%s)"
-              % ", ".join(sorted(PHANTOM_BINDING_KEYS)))
-
-
-def check_vault(R, apps, rels):
-    sec = "3. BUNDLEFIX-002 (vault de-HA)"
-    v = apps.get("vault")
-    if v is None:
-        R.add(sec, "WARN", "VAULT", "no 'vault' app found")
-        return
-    opts = (v or {}).get("options") or {}
-    if "vip" in opts:
-        R.add(sec, "FAIL", "VAULT-VIP", "vault has a 'vip' option (must be de-HA'd): %r" % opts["vip"])
-    else:
-        R.add(sec, "PASS", "VAULT-VIP", "vault has no vip")
-    if "os-public-hostname" in opts:
-        R.add(sec, "WARN", "VAULT-HOST", "vault has os-public-hostname (expected removed)")
-    if "vault-hacluster" in apps:
-        R.add(sec, "FAIL", "VAULT-HA", "vault-hacluster application is present (must be removed)")
-    else:
-        R.add(sec, "PASS", "VAULT-HA", "no vault-hacluster application")
-    for r in rels:
-        if isinstance(r, list) and any(isinstance(e, str) and e.startswith("vault:ha") for e in r):
-            R.add(sec, "FAIL", "VAULT-HAREL", "vault:ha relation present: %r" % (r,))
-
-
-def map_hacluster(apps, rels):
-    """principal -> hacluster_app_name, using charm==hacluster + the :ha relation."""
-    hac_apps = {n for n, s in apps.items() if (s or {}).get("charm") == "hacluster"}
-    principal_of = {}
-    for r in rels:
-        if not (isinstance(r, list) and len(r) == 2):
-            continue
-        a0, a1 = ep_app(r[0]), ep_app(r[1])
-        if a0 in hac_apps and a1 and a1 not in hac_apps:
-            principal_of[a1] = a0
-        elif a1 in hac_apps and a0 and a0 not in hac_apps:
-            principal_of[a0] = a1
-    return hac_apps, principal_of
-
-
-def check_hacluster(R, apps, rels):
-    sec = "4. BUNDLEFIX-003 (hacluster cluster_count)"
-    hac_apps, principal_of = map_hacluster(apps, rels)
-    if not hac_apps:
-        R.add(sec, "WARN", "HAC", "no hacluster apps found")
-        return principal_of
-    principal_for_hac = {h: p for p, h in principal_of.items()}
-    ok = 0
-    for h in sorted(hac_apps):
-        opts = (apps[h].get("options") or {})
-        cc = opts.get("cluster_count")
-        prin = principal_for_hac.get(h)
-        nu = (apps.get(prin, {}) or {}).get("num_units") if prin else None
-        if cc is None:
-            R.add(sec, "FAIL", "HAC-CC", "%s missing cluster_count" % h)
-            continue
-        if not prin:
-            R.add(sec, "WARN", "HAC-PRIN", "%s has no principal via :ha relation" % h)
-        if isinstance(nu, int) and cc > nu:
-            R.add(sec, "FAIL", "HAC-OVER",
-                  "%s cluster_count=%s > principal %s num_units=%s" % (h, cc, prin, nu))
-            continue
-        if cc != 1:
-            R.add(sec, "WARN", "HAC-NE1",
-                  "%s cluster_count=%s (testcloud baseline is 1)" % (h, cc))
-        else:
-            ok += 1
-    if ok:
-        R.add(sec, "PASS", "HAC", "%d hacluster app(s) cluster_count=1 and <= principal num_units" % ok)
-
-
-def check_memcached(R, apps, rels):
-    sec = "5. BUNDLEFIX-004 (memcached)"
-    if "memcached" not in apps:
-        R.add(sec, "FAIL", "MEMCACHE-APP", "no 'memcached' application")
-    else:
-        R.add(sec, "PASS", "MEMCACHE-APP", "memcached application present")
-    found = False
-    for r in rels:
-        if not (isinstance(r, list) and len(r) == 2):
-            continue
-        s = set()
-        for e in r:
-            if isinstance(e, str):
-                s.add(e)
-        if {"nova-cloud-controller:memcache", "memcached:cache"} <= s:
-            found = True
-    R.add(sec, "PASS" if found else "FAIL", "MEMCACHE-REL",
-          "nova-cloud-controller:memcache <-> memcached:cache relation %s"
-          % ("present" if found else "MISSING"))
-
-
-def check_router_bindings(R, apps):
-    sec = "6. BUNDLEFIX-005 (mysql-router metal binding)"
-    routers = [n for n, s in apps.items() if (s or {}).get("charm") == "mysql-router"]
-    if not routers:
-        R.add(sec, "WARN", "ROUTER", "no mysql-router apps found")
-        return
-    bad = 0
-    for n in sorted(routers):
-        b = (apps[n].get("bindings") or {})
-        # effective default space is the "" key; anchors already resolved by yaml
-        default = b.get("", None)
-        non_metal = {k: v for k, v in b.items() if v not in ("metal",)}
-        if default == "metal" and not non_metal:
-            continue
-        if default != "metal":
-            R.add(sec, "FAIL", "ROUTER-BIND",
-                  "%s default space binding is %r (expected metal)" % (n, default))
-            bad += 1
-        elif non_metal:
-            R.add(sec, "WARN", "ROUTER-BIND",
-                  "%s has non-metal endpoint binding(s): %r" % (n, non_metal))
-    if not bad:
-        R.add(sec, "PASS", "ROUTER-BIND",
-              "%d mysql-router app(s) bound to metal" % len(routers))
-
-
-def check_vips(R, apps, rels):
-    sec = "7. BUNDLEFIX-006 / D-020 (dual provider+metal VIPs)"
-    _, principal_of = map_hacluster(apps, rels)
-    clustered = sorted(principal_of.keys())
-    # set comparison vs expected D-020 clustered set
-    got = set(clustered)
-    exp = set(EXPECTED_CLUSTERED)
-    if got != exp:
-        if exp - got:
-            R.add(sec, "WARN", "VIP-SET", "expected-clustered apps NOT detected as clustered: %s"
-                  % ", ".join(sorted(exp - got)))
-        if got - exp:
-            R.add(sec, "WARN", "VIP-SET", "clustered apps beyond the D-020 set: %s"
-                  % ", ".join(sorted(got - exp)))
-    ok = 0
-    for name in clustered:
-        opts = (apps[name].get("options") or {})
-        vip = opts.get("vip")
-        if not vip:
-            R.add(sec, "FAIL", "VIP-MISS", "%s is clustered but has no vip" % name)
-            continue
-        parts = str(vip).split()
-        if len(parts) != 2:
-            R.add(sec, "FAIL", "VIP-DUAL", "%s vip is not dual (got %r)" % (name, vip))
-            continue
-        prov, metal = parts
-        if not in_net(prov, PROVIDER_NET):
-            R.add(sec, "FAIL", "VIP-PROV", "%s provider vip %s not in %s" % (name, prov, PROVIDER_NET))
-            continue
-        if not in_net(metal, METAL_NET):
-            R.add(sec, "FAIL", "VIP-METAL", "%s metal vip %s not in %s" % (name, metal, METAL_NET))
-            continue
-        po, mo = int(prov.split(".")[-1]), int(metal.split(".")[-1])
-        if po != mo:
-            R.add(sec, "FAIL", "VIP-MIRROR", "%s octets differ: provider .%d vs metal .%d" % (name, po, mo))
-            continue
-        if not (VIP_OCTET_MIN <= mo <= VIP_OCTET_MAX):
-            R.add(sec, "FAIL", "VIP-RANGE",
-                  "%s metal vip octet .%d outside reserved %d-%d" % (name, mo, VIP_OCTET_MIN, VIP_OCTET_MAX))
-            continue
-        expected_octet = EXPECTED_CLUSTERED.get(name)
-        if expected_octet is not None and po != expected_octet:
-            R.add(sec, "WARN", "VIP-OCTET",
-                  "%s vip octet .%d != D-020 map .%d" % (name, po, expected_octet))
-        ok += 1
-    if ok:
-        R.add(sec, "PASS", "VIP-DUAL",
-              "%d clustered API charm(s) have mirrored dual VIPs in the reserved range" % ok)
-
-
-def check_osd(R, apps):
-    sec = "8. Anti-pattern: ceph-osd osd-devices"
-    osds = [n for n, s in apps.items() if (s or {}).get("charm") == "ceph-osd"]
-    if not osds:
-        R.add(sec, "WARN", "OSD", "no ceph-osd app found")
-        return
-    for n in osds:
-        dev = (apps[n].get("options") or {}).get("osd-devices")
-        if not dev or not isinstance(dev, str) or not dev.strip().startswith("/"):
-            R.add(sec, "FAIL", "OSD-DEV", "%s osd-devices not a real path: %r" % (n, dev))
-        else:
-            note = ""
-            if "/dev/disk/by-" not in dev:
-                note = " (kernel-name; by-path/by-id is harder for bare metal -- Roosevelt note)"
-            R.add(sec, "PASS", "OSD-DEV", "%s osd-devices=%s%s" % (n, dev.strip(), note))
-
-
-def check_ovn(R, apps):
-    sec = "9. Anti-pattern: ovn-chassis mappings (MAC over NIC name)"
-    chassis = [n for n, s in apps.items() if (s or {}).get("charm") == "ovn-chassis"]
-    if not chassis:
-        R.add(sec, "WARN", "OVN", "no ovn-chassis app found")
-        return
-    for n in sorted(chassis):
-        opts = (apps[n].get("options") or {})
-        bim = opts.get("bridge-interface-mappings")
-        if not bim:
-            R.add(sec, "INFO", "OVN-BIM", "%s has no bridge-interface-mappings (expected for octavia-side chassis)" % n)
-            continue
-        if MAC_RE.search(str(bim)):
-            R.add(sec, "PASS", "OVN-BIM", "%s bridge-interface-mappings is MAC-based" % n)
-        else:
-            R.add(sec, "WARN", "OVN-BIM",
-                  "%s bridge-interface-mappings has no MAC (NIC-name? fragile): %r" % (n, bim))
-
-
-def check_os_networks(R, apps, rels):
-    sec = "10. D-020: spaces-native (no os-*-network pinning)"
-    _, principal_of = map_hacluster(apps, rels)
-    flagged = 0
-    for name in sorted(principal_of):
-        opts = (apps[name].get("options") or {})
-        for k in ("os-internal-network", "os-admin-network", "os-public-network"):
-            if k in opts:
-                R.add(sec, "WARN", "OS-NET",
-                      "%s sets %s (D-020 found spaces-native resolve sufficient; verify intent)" % (name, k))
-                flagged += 1
-    if not flagged:
-        R.add(sec, "PASS", "OS-NET", "no clustered charm pins os-*-network (spaces-native, per D-020)")
-
-
-def expected_channel(charm):
-    if charm in CHANNEL_MATRIX:
-        return CHANNEL_MATRIX[charm]
-    if charm in OPENSTACK_CORE_CHARMS:
-        return OPENSTACK_CORE_CHANNEL
-    return None
-
-
-def check_channels_base(R, apps):
-    sec = "11. Channels / base (verified Caracal matrix; WARN-only)"
-    mismatch = 0
-    for name, spec in sorted(apps.items()):
-        spec = spec or {}
-        charm = spec.get("charm")
-        ch = spec.get("channel")
-        exp = expected_channel(charm)
-        if exp and ch and ch != exp:
-            R.add(sec, "WARN", "CHANNEL", "%s (%s) channel=%s expected=%s" % (name, charm, ch, exp))
-            mismatch += 1
-        base = spec.get("base")
-        series = spec.get("series")
-        if base and base != EXPECTED_BASE:
-            R.add(sec, "WARN", "BASE", "%s base=%s expected=%s" % (name, base, EXPECTED_BASE))
-        if series and series not in ("jammy",):
-            R.add(sec, "WARN", "SERIES", "%s series=%s expected=jammy" % (name, series))
-    if not mismatch:
-        R.add(sec, "PASS", "CHANNEL", "no charm deviates from the known Caracal channel matrix")
-
-
-def summary_tables(R, apps, rels):
-    sec = "12. Inventory (informational)"
-    _, principal_of = map_hacluster(apps, rels)
-    for name in sorted(principal_of):
-        vip = ((apps[name].get("options") or {}).get("vip"))
-        R.add(sec, "INFO", "CLUSTERED", "%-26s vip=%s" % (name, vip))
-    routers = sorted(n for n, s in apps.items() if (s or {}).get("charm") == "mysql-router")
-    R.add(sec, "INFO", "ROUTERS", "%d mysql-router apps: %s" % (len(routers), ", ".join(routers)))
-
-
-# --------------------------------------------------------------------------- #
-# Main
-# --------------------------------------------------------------------------- #
-def main():
-    ap = argparse.ArgumentParser(description="Comprehensive Caracal bundle reviewer (read-only).")
-    ap.add_argument("bundle", nargs="?", default="bundle.yaml")
-    ap.add_argument("--strict", action="store_true", help="treat WARN as failing for exit code")
-    ap.add_argument("--quiet", action="store_true", help="show only WARN/FAIL")
-    args = ap.parse_args()
-
-    try:
-        with open(args.bundle, "r", encoding="utf-8", errors="replace") as fh:
-            text = fh.read()
-    except FileNotFoundError:
-        sys.stderr.write("ERROR: bundle not found: %s\n" % args.bundle)
-        return 2
-
-    try:
-        doc = yaml.load(text, Loader=DupKeyLoader)
-    except yaml.YAMLError as e:
-        sys.stderr.write("ERROR: YAML parse failed: %s\n" % e)
-        return 2
-
-    R = Reporter(quiet=args.quiet)
-    print("================ Caracal v1 bundle review: %s ================" % args.bundle)
-
-    check_ascii(R, text)
-    apps, rels = check_structure(R, doc)
-    if apps is None:
-        R.emit()
-        return 1
-    check_relations(R, apps, rels)
-    check_bindings_phantom(R, apps)
-    check_vault(R, apps, rels)
-    check_hacluster(R, apps, rels)
-    check_memcached(R, apps, rels)
-    check_router_bindings(R, apps)
-    check_vips(R, apps, rels)
-    check_osd(R, apps)
-    check_ovn(R, apps)
-    check_os_networks(R, apps, rels)
-    check_channels_base(R, apps)
-    summary_tables(R, apps, rels)
-
-    R.emit()
-    fail = R.counts["FAIL"] > 0
-    warn = R.counts["WARN"] > 0
-    if fail or (args.strict and warn):
-        print("\nVERDICT: NOT CLEAN" + (" (--strict: WARN counts)" if (warn and not fail) else ""))
-        return 1
-    print("\nVERDICT: CLEAN" + (" (with WARN review items)" if warn else ""))
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/runbooks/01-destroy-model.md b/runbooks/01-destroy-model.md
deleted file mode 100644
index d158b84..0000000
--- a/runbooks/01-destroy-model.md
+++ /dev/null
@@ -1,99 +0,0 @@
-# Runbook 01 — Teardown of existing testcloud
-
-**Reference:** D-018 (skip graceful, MAAS-release-direct). Supersedes the
-graceful-teardown approach formerly in D-013.
-
-**Pre-conditions:**
-
-- KVM snapshots of openstack0–3 exist as the safety net (pre-Magnum
-  baseline). With L3 full rebuild (D-017) we should not need them, but they
-  remain valid disaster recovery.
-- Run from jumphost `vopenstack-jesse` as user `jessea123`.
-- Authenticated Juju session active (`juju whoami` returns identity).
-- MAAS CLI profile configured OR access to MAAS UI for releasing machines.
-- This procedure destroys the entire `openstack` Juju model and wipes all 5
-  MAAS-managed VMs. There is no undo short of restoring from snapshot.
-
-**Phase A — Pre-destroy capture (~30 sec)**
-
-```bash
-BACKUP_DIR=~/backups/pre-caracal-destroy-$(date -u +%Y%m%dT%H%M%SZ)
-mkdir -p "$BACKUP_DIR"
-juju export-bundle > "$BACKUP_DIR/bundle-pre-destroy.yaml"
-juju status --format=yaml > "$BACKUP_DIR/juju-status-pre-destroy.yaml"
-juju models --format=yaml > "$BACKUP_DIR/juju-models-pre-destroy.yaml"
-ls -la "$BACKUP_DIR"
-```
-
-This is reference material for diff-checking against the new Caracal bundle
-later. Not used for restore.
-
-**Phase B — Force-destroy the Juju model (~1-2 min to return; ~5-10 min to fully reap in background)**
-
-```bash
-juju destroy-model openstack --force --no-wait --destroy-storage --no-prompt
-```
-
-Flags:
-
-- `--force` — ignore charm hooks; don't wait for graceful shutdown
-- `--no-wait` — return immediately; reaping continues in the background
-- `--destroy-storage` — mark Juju-tracked persistent storage for cleanup
-- `--no-prompt` — non-interactive
-
-The Juju controller on `juju.maas` is untouched. Only the `openstack` model
-is destroyed.
-
-**Phase C — Release MAAS machines (parallel with Phase B; ~5 min)**
-
-Either path is acceptable. UI is faster for visual confirmation; CLI is
-script-documented for Roosevelt.
-
-**Path 1 — MAAS UI:** Machines → select `openstack0`, `openstack1`,
-`openstack2`, `openstack3`, `capi-mgmt` → Take action → Release.
-
-**Path 2 — MAAS CLI:**
-
-```bash
-# Replace $PROFILE with your MAAS CLI profile name (e.g. "admin")
-PROFILE=admin
-
-# Look up system IDs
-maas $PROFILE machines read 2>/dev/null \
-  | jq -r '.[] | select(.hostname | test("^(openstack[0-3]|capi-mgmt)$")) | "\(.hostname) \(.system_id) \(.status_name)"'
-
-# Release each by system_id
-for SID in <id1> <id2> <id3> <id4> <id5>; do
-  maas $PROFILE machine release "$SID" comment="Caracal rebuild teardown"
-done
-```
-
-LXD VMs managed by MAAS are destroyed on release; the VMs go away and the
-machine entries return to Ready state.
-
-**Phase D — Verification (~1 min)**
-
-```bash
-# Juju side
-juju models
-# Expect: openstack model not listed
-
-# MAAS side — all 5 hostnames must report Ready
-maas $PROFILE machines read 2>/dev/null \
-  | jq -r '.[] | select(.hostname | test("^(openstack[0-3]|capi-mgmt)$")) | "\(.hostname) \(.status_name)"'
-# Expect five lines, each ending in "Ready"
-```
-
-**If the Juju model is still listed as "destroying" after 10 minutes:**
-
-```bash
-# Force-clean any orphan machine entries
-juju machines -m openstack --format=yaml 2>/dev/null
-# For each lingering machine:
-juju remove-machine -m openstack --force <id>
-# Then attempt model removal again
-juju destroy-model openstack --force --no-wait --no-prompt
-```
-
-**Exit criteria:** `juju models` does not show `openstack`. All 5 VMs show
-`Ready` in MAAS. Proceed to `02-deploy.md`.
diff --git a/runbooks/README.md b/runbooks/README.md
new file mode 100644
index 0000000..2195bde
--- /dev/null
+++ b/runbooks/README.md
@@ -0,0 +1,47 @@
+# v1 Deploy Runbook -- VR0 DC0 Omega Cloud (Caracal 2024.1, IPv4)
+
+The deploy is a gated sequence: run `phase-00` through `phase-08` in order. Each phase
+ends in a hard gate (an explicit pass/fail check); do not start the next phase until the
+current gate passes. The two appendices are reference, not steps.
+
+## Conventions
+
+- **RUN location.** Every command block is tagged with where it runs: `# RUN: jumphost`
+  (the `vopenstack-jesse` jumphost, with `juju` + the openstack CLI), `# RUN: mgmt VM`
+  (the in-cloud CAPI management VM, reached over SSH), or a charm unit via
+  `juju ssh <unit> -- '...' </dev/null`.
+- **Gates.** A line beginning `GATE:` or `EXIT GATE` is a stop-and-verify. Read-only
+  verification precedes every mutation; destructive and secret-handling steps are
+  individually gated, never batched.
+- **Dynamic lookups.** VIPs, project names, IDs, and version constellations are
+  discovered at run time, not hardcoded. Site-specific literals are tagged `ENV(...)`
+  for the eventual generalization pass toward Roosevelt.
+- **ASCII only.** All runbook content is ASCII (a mod_wsgi UnicodeDecodeError lesson);
+  keep it that way on edit.
+
+## Phases
+
+| #  | File                                    | Purpose                                              | Decisions             |
+| -- | --------------------------------------- | ---------------------------------------------------- | --------------------- |
+| 00 | phase-00-teardown-maas-reset.md         | Destroy the model + reset MAAS to a clean rebuild    | KI-P3-001             |
+| 01 | phase-01-bundle-deploy.md               | Octavia PKI overlay + `juju deploy` + settle wait    |                       |
+| 02 | phase-02-vault-bringup.md               | Vault init/unseal + cert cascade (PKI root)          | manual unseal = v1 std |
+| 03 | phase-03-core-verify.md                 | Settle, regenerate admin-openrc, verify Horizon      |                       |
+| 04 | phase-04-network-carve.md               | Provider external network + IPAM reference           |                       |
+| 05 | phase-05-octavia-enablement.md          | Enable Octavia (amphora)                             | D-021                 |
+| 06 | phase-06-incloud-mgmt-cluster.md        | In-cloud single-homed CAPI management cluster        | D-035                 |
+| 07 | phase-07-conductor-graft.md             | Graft the magnum-capi-helm driver onto the conductor | D-031 / D-037 / D-042 |
+| 08 | phase-08-workload-cluster-acceptance.md | End-to-end tenant cluster + acceptance bar           | D-011 (amended D-019) |
+
+## Appendices
+
+- **appendix-A-troubleshooting.md** -- symptom -> cause -> fix index, keyed by
+  D-NNN / DOCFIX-NNN / lesson.
+- **appendix-B-asbuilt-version-lock.md** -- charm channels, the CAPI version
+  constellation, and the magnum-capi-helm driver pin.
+
+## History
+
+This `phase-NN` set supersedes the earlier `v1-do-doc-NN-*` execution documents (and the
+older `NN-*.md` set and the `deprecated/` folder), which were removed in the repo
+sanitation sweep. Git history preserves them.
diff --git a/runbooks/appendix-A-troubleshooting.md b/runbooks/appendix-A-troubleshooting.md
new file mode 100644
index 0000000..3b3a9ee
--- /dev/null
+++ b/runbooks/appendix-A-troubleshooting.md
@@ -0,0 +1,366 @@
+# Appendix A -- Troubleshooting / Known-Issues Index
+
+Keyed by the same `D-NNN` / `DOCFIX-NNN` / `L-P6-N` identifiers used inline in the
+phase runbooks. This is an OPERATIONAL index (symptom -> cause -> fix), NOT the
+decision log: full rationale lives in `design-decisions.md` and the per-decision
+files (`D-0NN-*.md`); the driver fix has its own `magnum-capi-helm-driver-fix-runbook`.
+Each entry notes the phase(s) that reference it. ASCII-only.
+
+================================================================================
+## Remote execution / scripting
+================================================================================
+
+### DOCFIX-021 -- heredoc / stdin consumption  (phase-06, phase-07)
+- Symptom: a multi-line `juju ssh`/`ssh ... bash -s` or remote `sudo` block dies
+  early or behaves as if truncated; later commands in the heredoc never run.
+- Cause: an inner `ssh`/`sudo`/`juju ssh` (or any stdin reader) consumes the rest
+  of the heredoc/pipe that was feeding the outer command.
+- Fix: append `</dev/null` to every inner `ssh`/`sudo`/`juju ssh` invocation
+  (use `</dev/tty` instead only when the call genuinely needs an interactive prompt).
+- Also: wrap multi-statement pasteable jumphost blocks in `( { ...; } )` so a stray
+  `exit` cannot kill the interactive shell.
+- SECOND MANIFESTATION (phase-03): a charm ACTION's human output silently corrupts a
+  captured artifact. `juju run vault/leader get-root-ca` wraps the PEM in an INDENTED
+  YAML `output: |-` block; `sed`-by-marker preserves the indent and an indented
+  `-----BEGIN CERTIFICATE-----` is not valid PEM -> openssl "Unable to load
+  certificate" -> keystone NO_CERTIFICATE_OR_CRL_FOUND. Fix: pull from the action JSON
+  (real newlines, no indent): `juju run vault/leader get-root-ca -m openstack
+  --format json | jq -r '[.. | strings | select(test("BEGIN CERTIFICATE"))][0]'`.
+  (Same class as DOCFIX-006: never trust action human output for a captured secret/cert.)
+
+### L-P6-4 -- admin-kubeconfig / secret transfer  (phase-07)
+- Risk: staging the cluster-admin kubeconfig (or any secret) in `/tmp`, or letting a
+  PTY mangle it in transit.
+- Fix: pipe base64 straight into a root-written file with `umask 077`, then `chown`
+  to the service user and `chmod 0600` -- never touch `/tmp`. (Pattern in phase-07 7.2.)
+- Hardening (Roosevelt): replace the cluster-admin kubeconfig with a scoped
+  ServiceAccount kubeconfig carrying only the RBAC the driver needs.
+
+================================================================================
+## k8s-snap bootstrap (mgmt cluster)
+================================================================================
+
+### DOCFIX-024 -- bootstrap config missing the cluster-config block  (phase-06)
+- Symptom: `k8s bootstrap` "succeeds" but the node never reaches Ready; network and
+  DNS are silently disabled; CoreDNS/Cilium absent.
+- Cause: a bootstrap `--file` whose top level lacks a `cluster-config:` block leaves
+  ALL features (network, dns, ...) at disabled defaults. Setting only `pod-cidr` /
+  `service-cidr` / `extra-sans` does NOT enable them.
+- Fix: include an explicit block:
+      cluster-config:
+        network: { enabled: true }
+        dns:     { enabled: true }
+  (See phase-06 6.4 for the full config.) Retry: `snap remove k8s --purge` then re-bootstrap.
+
+================================================================================
+## CAPI provider install (mgmt cluster)
+================================================================================
+
+### DOCFIX-025a -- cert-manager Helm flag  (phase-06)
+- Symptom: cert-manager install fails / CRDs absent when using `--set installCRDs=true`.
+- Cause: `installCRDs` was removed from the cert-manager chart (~v1.18). The current
+  flag is `crds.enabled=true`.
+- Fix: `helm install cert-manager jetstack/cert-manager ... --set crds.enabled=true`.
+
+### D-034 -- CAPI install ordering (ORC before clusterctl init)  (phase-06)
+- Symptom: after `clusterctl init`, `capo-controller-manager` CrashLoopBackOff
+  (observed ~6 restarts / ~15 min) before self-healing.
+- Cause: CAPO v0.14.4's `openstackserver` controller hard-depends on ORC's
+  `Image.openstack.k-orc.cloud` CRD at startup. `clusterctl init` installs CAPO; if
+  ORC is not yet present, CAPO crash-loops until it appears.
+- Fix: install ORC (its manifest provides the `Image` CRD) BEFORE `clusterctl init`.
+  Hardened order: cert-manager -> ORC -> clusterctl init -> CAAPH -> janitor.
+- Related rule: source every provider version from the chosen `capi-helm-charts`
+  tag's `dependencies.json` (read live with `jq`); do not hardcode semver.
+  (Full rationale: design-decisions D-034; driver-coherence amendment: D-042.)
+
+================================================================================
+## Networking / pod egress
+================================================================================
+
+### D-035 -- dual-homed mgmt node pod-egress reverse-path failure  (phase-06)
+- Symptom (the prior D-033 architecture): a pod's egress TCP connect to an external
+  VIP hangs; the agnhost probe never reaches Completed. SYN leaves the correct NIC and
+  the SYN-ACK arrives, but the reply is emitted back out the NIC instead of being
+  redirected into the pod via `cilium_host` -- silent, asymmetric breakage. (The
+  "do-07 pattern.")
+- Cause: Cilium reverse-path handling on a node with multiple NICs.
+- Fix (chosen): D-035 single-homed in-cloud tenant VM avoids it entirely; phase-06
+  GATE 2 (agnhost pod -> Keystone VIP, must Complete) is the explicit proof. (The
+  transferable alternative -- Cilium device pinning -- is a Roosevelt note, not v1.)
+
+================================================================================
+## Magnum conductor
+================================================================================
+
+### D-037 -- conductor config-dir injection (NOT a systemd ExecStart drop-in)  (phase-07)
+- Symptom: the `[capi_helm]` conf.d drop-in is ignored; the conductor behaves as if it
+  was never written, even though a systemd drop-in "looks" applied.
+- Cause: these OpenStack debs (openstack-pkg-tools) run the daemon through an LSB init
+  script wrapped by systemd `systemd-start`, NOT a direct `ExecStart=`. A systemd
+  drop-in appending `--config-dir` passes it as a positional arg to the init script,
+  which ignores it -- the flag never reaches the daemon. The args are assembled inside
+  the init script from `DAEMON_ARGS` (base `--config-file` first), extensible only via
+  `/etc/default/<service>`.
+- Fix: create `/etc/default/magnum-conductor` (0644; the charm does not manage it):
+      DAEMON_ARGS="$DAEMON_ARGS --config-dir /etc/magnum/magnum.conf.d"
+  Verify with the init script's own `show-args` (dry-run) AND `ps -ww -C
+  magnum-conductor -o args` on the live process -- behavioral, not string-presence.
+- Residual: if a future charm hook ever writes `/etc/default/magnum-conductor`, the
+  append is lost and `[capi_helm]` silently stops being read. Re-check via show-args/ps.
+
+### L-P6-1 / L-P6-2 -- verify the launched cmdline, not the unit text  (phase-07)
+- Rule: never assume the systemd `ExecStart` shape for OpenStack debs, and never treat
+  "string present in the unit file" as "the daemon received the flag." Gate on the
+  assembled/launched cmdline (`show-args`, then `ps` on the live process).
+
+### L-P6-3 -- k8s version comes from the IMAGE, not a template label  (phase-08)
+- Symptom: cluster create fails in the driver before provisioning.
+- Cause: the magnum-capi-helm driver reads `kube_version` from the Glance image
+  properties and routes on `os_distro`; it does NOT take k8s version from a template
+  label.
+- Fix: the workload image (e.g. `ubuntu-jammy-kube-v1.32.13`) MUST carry
+  `kube_version` (e.g. v1.32.13) and `os_distro=ubuntu`. Verify before create (phase-08 8.0).
+
+================================================================================
+## Driver / cluster health
+================================================================================
+
+### D-042 -- driver contract-coherence; health "infrastructure: not found"  (phase-07, phase-08, appendix-B)
+- Symptom: `coe cluster show` reports `health_status = UNHEALTHY` deterministically
+  (survives a conductor restart); only the `infrastructure` sub-check fails
+  ("Infrastructure resource not found"); cluster + control-plane + nodegroup are Ready.
+- Cause: driver 1.3.0 reads `apiVersion` off `spec.infrastructureRef` to build its
+  health GET, but the CAPI v1.13 (v1beta2 contract) ref carries apiGroup+kind+name with
+  NO apiVersion. COSMETIC -- the create path is unaffected (the chart templates the
+  resource versions); only the driver's direct health query breaks.
+- Fix: upgrade to the RELEASED `magnum-capi-helm==1.4.0` (the "generalize-api-resources"
+  feature). 1.4.0 builds each health GET from an explicit api_version via its
+  `[capi_helm] api_resources` option, which DEFAULTS to v1beta1 for every CAPI kind --
+  and CAPI v1.13.2 / CAPO v0.14.4 still serve v1beta1, so the default works (no override
+  needed; phase-07 7.3-7.6). Set a per-kind override only if a kind is v1beta2-only.
+  Rule (amends D-034): the Layer-B driver pin must be contract-coherent with the
+  Layer-A CAPI core.
+- Operational caveat while unfixed: do NOT wire magnum auto-healing to `health_status`
+  (a persistent false UNHEALTHY could misfire); CAPI MachineHealthCheck heals independently.
+
+================================================================================
+## Cluster lifecycle / Octavia
+================================================================================
+
+### D-039 -- app-cred roles (load-balancer_member) / Octavia 403  (phase-08)
+- Symptom: cluster create or delete wedges; CAPO gets 403 querying the Octavia LB.
+- Cause: the Magnum-minted application credential lacks `load-balancer_member`
+  (a pre-D-039 frozen app-cred cannot query Octavia to confirm LB state).
+- Fix: ensure the service path mints app-creds carrying `load-balancer_member`
+  (+ member, reader). Verify before acceptance (phase-08 prereqs).
+
+### stuck-delete -- wedged CAPI cluster delete  (phase-08)
+- Symptom: cluster stuck `DELETE_IN_PROGRESS`; helm release already gone; `Cluster`
+  and `OpenStackCluster` CRs stuck Deleting (often on an Octavia 403, see D-039).
+- Recovery: clear the `OpenStackCluster` finalizer on the mgmt cluster --
+  `kubectl -n <magnum-ns> patch openstackcluster <cluster>-<suffix> --type=merge
+  -p '{"metadata":{"finalizers":[]}}'`. The `Cluster` finalizer was only waiting on it,
+  so the Cluster auto-finalizes and deletes. Then manually clean orphaned neutron
+  resources in dependency order: router remove subnet -> router unset external-gateway
+  -> router delete -> subnet delete -> network delete -> security group delete.
+
+### LB-failover -- LB stuck provisioning_status=ERROR after a host event  (phase-08)
+- Symptom: the kube-api Octavia LB shows `operating_status ONLINE` but
+  `provisioning_status ERROR` after a host outage/OOM.
+- Cause: a control-plane op on the amphora failed during the outage.
+- Fix: `openstack loadbalancer failover <lb-id>` in ADMIN-project scope (amphora /
+  failover ops 403 under tenant member scope). Watch ERROR -> PENDING_UPDATE -> ACTIVE
+  (~100s); a single STANDALONE amphora gives a brief blip; operating_status holds ONLINE.
+
+### uninitialized-taint -- workload addons Pending  (phase-08)
+- Symptom: new workload nodes are kubelet-Ready but addon pods (metrics-server,
+  node-feature-discovery, etc.) stay Pending; nodes carry
+  `node.cluster.x-k8s.io/uninitialized`.
+- Cause: that taint is removed by the CAPI machine controller on the MANAGEMENT
+  cluster. If the mgmt cluster is down (see D-041), the taint persists.
+- Fix: restore the mgmt cluster API; CAPI then removes the taint and addons schedule.
+
+### CNI-label -- network_driver vs the chart-default Calico (1.4.0)  (phase-08)
+- Note: under the as-FIRST-built driver 1.3.0 the legacy Magnum `network_driver` label
+  was IGNORED and the capi-helm `openstack-cluster` chart's default CNI (Calico) always
+  ran. Under the RELEASED 1.4.0 driver the `network_driver` template option IS honored
+  (it maps through to the chart). To keep the as-built CNI (Calico), the `capi-k8s-v1-32`
+  template OMITS `--network-driver` (phase-08); set `flannel` there only to intentionally
+  switch the CNI. (Mgmt cluster CNI is separately Cilium, via k8s-snap.)
+
+================================================================================
+## Hyperconverged host / mgmt-VM resilience
+================================================================================
+
+### D-040 -- host OOM from low reserved-host-memory  (phase-08)
+- Symptom: guests OOM-killed; a compute host may even present in Juju as
+  `State=down` (heavy swap thrash stalls OVS/OVN heartbeats and the machine agent).
+- Cause: `reserved-host-memory` default 512 MB does not cover the co-located
+  LXD/Ceph/MySQL services on these hyperconverged hosts -> nova over-commits real RAM.
+- Fix: `reserved-host-memory = 8192` on all compute units (baked into the hardened
+  bundle). Diagnose a suspected OOM-vs-reboot with `who -b` / `uptime` (no recent boot)
+  and `journalctl -k | grep -i oom`; the ovsdb "no response to inactivity probe ...
+  disconnecting" storm is the swap-thrash signature.
+
+### D-041 -- single-node mgmt cluster does not self-heal  (phase-08)
+- Symptom: after a host event the mgmt VM (`capi-mgmt-v2`) is SHUTOFF; FIP
+  unreachable; magnum cannot reach the mgmt API; workload addons go Pending (see
+  uninitialized-taint).
+- Cause: the D-035 single-node mgmt cluster is a SPOF with no MachineHealthCheck
+  (unlike the workload cluster).
+- Fix: `openstack server start capi-mgmt-v2` (API serves ~40s later; a brief TLS
+  handshake timeout on the first kubectl is expected). Follow-up: HA mgmt cluster for
+  Roosevelt.
+
+### juju-macaroon -- "cannot get discharge ... EOF"  (phase-07, phase-08)
+- Symptom: `juju ssh` (or other juju calls) fail mid-session with a discharge/EOF error.
+- Cause: the juju macaroon expired during a long session.
+- Fix: re-run `juju login`, then retry.
+
+================================================================================
+## Teardown / MAAS reset (phase-00)
+================================================================================
+
+### DOCFIX-016 -- never `maas list` (API-key leak)  (phase-00, phase-01, phase-04)
+- Risk: `maas list` prints the stored API key to stdout (and into any transcript/log).
+- Fix: the profile name is known (`admin`); call `maas admin ...` directly. Never run
+  `maas list` in a runbook or paste block.
+
+### DOCFIX-017 -- no `maas whoami`; hardcode the eyeballed system_ids  (phase-00)
+- Risk: scripting machine selection via `maas <profile> whoami` + owner filters is
+  fragile and, in this lab, unnecessary.
+- Fix: the four host system_ids are fixed and eyeball-verified
+  (openstack0=4na83t, openstack1=qdbqd6, openstack2=h8frng, openstack3=tmsafc) --
+  iterate those literals. (The older 01-destroy-model.md used `maas list`/`whoami` and
+  released 5 VMs incl. the retired D-033 capi-mgmt; the current rebuild releases 4.)
+
+### R7 -- sudo for libvirt / qemu-img  (phase-00, phase-01)
+- The OSD qcow2 files (`/var/lib/libvirt/images/<host>-1.qcow2`) are root:root / 600;
+  `qemu-img info|create`, `virsh domstate`, `stat`, `rm` against them all need `sudo`.
+
+### KI-P3-001 -- VIP / primary collision  (phase-00, phase-04)
+- Symptom: a charm `vip:` address equals a MAAS-auto-assigned machine/container
+  primary (observed: cinder public VIP .226 == magnum container 1/lxd/3 primary).
+- Cause: MAAS auto-static allocation was not excluded over the VIP block (provider had
+  NO VIP reservation), so MAAS handed primaries .225/.226/.227 onto the .224-.236 VIPs.
+- Fix (durable): on EVERY space carrying VIPs (provider AND metal) reserve the
+  front-loaded VIP /26 in MAAS, distinct from the primary range and any neutron
+  allocation_pool (phase-00 Phase 4). A reserved range stops future auto-assign onto
+  a configured VIP. Negative test post-deploy: no service vip == any unit primary.
+
+================================================================================
+## Deploy-time (phase-01)
+================================================================================
+
+### R14 -- VIP relocation .224-.236 -> .50-.60  (phase-01)
+- The public + internal API VIPs were front-loaded out of the old high-end .224-.236
+  block into .50-.60 (inside the reserved .2-.63 /26). Every bundle `vip:` is a dual
+  provider+metal pair "10.12.4.5x 10.12.8.5x" (D-020). Pre-deploy guard: total provider
+  VIPs=11, all in .50-.60, zero in the stale .10-.20 (phase-01 1.1). Any per-cloud
+  consumer of a VIP (the Horizon reverse proxy, monitoring) must be repointed.
+
+### R15 -- the .10 phantom resolver  (phase-01)
+- Symptom: an unreachable region resolver `10.12.8.10` appears in a node's resolver
+  list (sometimes as Current DNS Server) despite the subnet dns_servers override.
+- Cause: MAAS advertises its region/rack controller as a DNS server on the
+  MAAS-managed metal VLAN, independent of the subnet field; the override does not purge it.
+- Impact: NON-BLOCKING -- systemd-resolved deprioritizes .10 and falls through to .1.
+  Latent fragility if .1 ever drops. Understand/eliminate for Roosevelt (no libvirt split there).
+
+### L1 -- no `set -e` on count-gate blocks; guard greps `|| true`  (phase-01)
+- A guarded `grep -c` returning 0 is a VALID answer, not a failure. Under `set -e` a
+  zero-count grep aborts the block. Pre-deploy verify blocks run WITHOUT `set -e`, and
+  every count grep ends `|| true`. (`bash -n` would not catch this -- it is behavior.)
+
+### L3 -- metal-side dual-VIP eyeball check  (phase-01)
+- The provider-side VIP guard greps only the first token of each dual `vip:`. The metal
+  side (second token, `10.12.8.5x`) must be eyeballed to confirm all 11 sit in .8.50-.60,
+  clear of metal infra (.8.10 maas / .8.20 lxd / .8.21 capi / .8.30 juju).
+
+================================================================================
+## Vault / secrets (phase-02)
+================================================================================
+
+### DOCFIX-006 -- vault init is one-shot; stdout-only redirect loses the keys  (phase-02)
+- Symptom: `vault operator init ... > file` captures stdout only; if the key block went
+  to stderr (or the run is interrupted) you are left with an unusable/empty file and the
+  5 shares + root token are GONE -- init runs exactly once and cannot be replayed.
+- Fix: `vault operator init -key-shares=5 -key-threshold=3 2>&1 | tee ~/vault-init/init.txt`
+  VERBATIM; gate on `grep -c '^Unseal Key' == 5` and `Initial Root Token` present; then
+  save the file OFF-HOST before anything else. Never improvise this command.
+
+### DOCFIX-011 -- authorize-charm parameter is `token`  (phase-02)
+- The vault `authorize-charm` action takes `token` (a direct token string); there is no
+  `token-secret-id` variant in this charm rev. Confirm via `juju actions vault --schema`.
+  Authorize with a SHORT-LIVED CHILD token (juju run persists action params in the op log).
+
+### DOCFIX-014 -- generate-root-ca is required  (phase-02)
+- Symptom: after authorize-charm, vault stays BLOCKED "Missing CA cert".
+- Fix: run `juju run vault/leader generate-root-ca` -- it mints the charm-pki-local
+  root and clears the block straight to active. (Omitting it leaves vault hung.)
+
+### L4 -- vault unseal via hidden prompt, not key-on-argv  (phase-02)
+- Use Vault's own `vault operator unseal` (no argument) so it prompts hidden; the key is
+  never on the command line / in a var / in `ps` / in scrollback. Do NOT use
+  `vault operator unseal $KEY` (visible in `ps` on the unit). Unseal is re-runnable, so
+  the verbatim-reference rule is looser here, but the security gain is real.
+
+### R3 -- "HA Enabled false" is correct for vault-on-mysql  (phase-02)
+- Expected post-unseal: Initialized true / Sealed false / Storage Type mysql /
+  **HA Enabled false**. Single-unit vault on the mysql backend is non-HA by design; any
+  reference to "HA Enabled true (etcd backend)" is STALE (etcd was dropped).
+
+================================================================================
+## Identity / openrc (phase-03)
+================================================================================
+
+### DOCFIX-018 -- IP-only OS_AUTH_URL  (phase-03)
+- This cloud is IP-only (no FQDN, no cloud DNS). The admin openrc must point at the
+  keystone PUBLIC endpoint by IP: `OS_AUTH_URL=https://10.12.4.50:5000/v3`, with the
+  vault root CA in `OS_CACERT` (B5 IP-SAN certs validate). No /etc/hosts, no FQDN.
+
+### DOCFIX-022 -- discover the admin project; do not hardcode it  (phase-03)
+- Symptom: with TLS working, keystone returns HTTP 401.
+- Cause: wrong project scope. The scoping project name varies by charm rev (here it is
+  `admin`, living in domain `admin_domain`; an older doc's `OS_PROJECT_NAME=admin_domain`
+  401s). Credential good, scope wrong.
+- Fix: a candidate loop -- try each of "admin admin_domain"; the first that issues a
+  SCOPED token wins (phase-03 3.2). Costs 2 extra token requests; self-corrects across
+  revs instead of re-introducing the 401-by-hardcode.
+
+================================================================================
+## Octavia enablement (phase-05)
+================================================================================
+
+### L7 -- the openstack snap cannot read /tmp  (phase-05, also phase-01 PKI sanity)
+- Symptom: `openstack image create --file /tmp/...` -> "[Errno 2] No such file or
+  directory" even though `sha256sum` just read the same path.
+- Cause: the openstack CLI snap is confined and cannot read `/tmp`; it CAN read `$HOME`
+  (home interface).
+- Fix: stage any file the snap must read under `$HOME` (e.g. `$HOME/amphora-base/...`),
+  never `/tmp`.
+
+### octavia-configure-resources -- long-running action; o-hm0 transient is normal  (phase-05)
+- `configure-resources` is long-running: juju's default action wait may time out
+  ("timed out waiting for results") while the hook KEEPS RUNNING -- do NOT treat the
+  wait-timeout as failure or re-fire blindly. Use a bound `--wait` and confirm completion
+  via `juju show-operation <N>` (authoritative), not the streamed log.
+- NORMAL (not faults) during/after: lb-mgmt-net is IPv6-ULA (fc00::/..) by design; a
+  "Virtual network for access to Amphorae is down" transient self-heals as o-hm0 comes
+  up; the lb-mgmt `network:distributed` port shows DOWN (logical OVN port, never chassis-bound).
+
+### amp-image-tag-mismatch -- LP#1937003  (phase-05)
+- Octavia looks up the amphora image by `octavia amp-image-tag`; it MUST equal the tag
+  the retrofit stamps (`octavia-diskimage-retrofit amp-image-tag`), both `octavia-amphora`.
+  A mismatch means octavia cannot find the image even though it is built and ACTIVE.
+  The amphora pipeline gate asserts the two are equal before building (phase-05 5.2).
+
+================================================================================
+## Notes
+================================================================================
+- This index covers phases 00-08. It grows the same way for any future phase: keyed by
+  D-NNN / DOCFIX-NNN / L-N / R-N / named-symptom, each entry symptom -> cause -> fix
+  with a "phase NN" back-reference, and decision rationale left to design-decisions.md.
+- memcached track drift is recorded in appendix-B (B.1), not here (it is a
+  version-lock note, not a troubleshooting entry).
diff --git a/runbooks/appendix-B-asbuilt-version-lock.md b/runbooks/appendix-B-asbuilt-version-lock.md
new file mode 100644
index 0000000..add0350
--- /dev/null
+++ b/runbooks/appendix-B-asbuilt-version-lock.md
@@ -0,0 +1,139 @@
+# Appendix B -- As-Built Version / Channel / Revision Lock
+
+Source: `juju export-bundle` (model `openstack`) + the in-cloud mgmt-cluster
+captures, 2026-06-09. ASCII-only.
+
+POLICY (D-002 + consolidation prompt): the bundle PINS CHANNELS, not revisions.
+This appendix records the as-built REVISIONS as the known-good baseline. A fresh
+deploy resolving a channel to a higher revision than below is EXPECTED -- treat
+this as "last-known-good," verify against Charmhub at pre-flight, and refresh the
+table on a successful validated deploy.
+
+## B.1 Charm channels + as-built revisions
+
+| Application                     | Charm                      | Channel (pinned)   | As-built rev |
+| ------------------------------- | -------------------------- | ------------------ | ------------ |
+| barbican                        | barbican                   | 2024.1/stable      | 209 |
+| barbican-hacluster              | hacluster                  | 2.4/stable         | 131 |
+| barbican-mysql-router           | mysql-router               | 8.0/stable         | 1154 |
+| barbican-vault                  | barbican-vault             | 2024.1/stable      | 75 |
+| ceph-mon                        | ceph-mon                   | squid/stable       | 268 |
+| ceph-osd                        | ceph-osd                   | squid/stable       | 632 |
+| ceph-radosgw                    | ceph-radosgw               | squid/stable       | 600 |
+| ceph-radosgw-hacluster          | hacluster                  | 2.4/stable         | 131 |
+| cinder                          | cinder                     | 2024.1/stable      | 733 |
+| cinder-ceph                     | cinder-ceph                | 2024.1/stable      | 533 |
+| cinder-hacluster                | hacluster                  | 2.4/stable         | 131 |
+| cinder-mysql-router             | mysql-router               | 8.0/stable         | 1154 |
+| dashboard-mysql-router          | mysql-router               | 8.0/stable         | 1136 |
+| glance                          | glance                     | 2024.1/stable      | 642 |
+| glance-hacluster                | hacluster                  | 2.4/stable         | 131 |
+| glance-mysql-router             | mysql-router               | 8.0/stable         | 1154 |
+| glance-simplestreams-sync       | glance-simplestreams-sync  | 2024.1/stable      | 124 |
+| keystone                        | keystone                   | 2024.1/stable      | 778 |
+| keystone-hacluster              | hacluster                  | 2.4/stable         | 131 |
+| keystone-mysql-router           | mysql-router               | 8.0/stable         | 1154 |
+| magnum                          | magnum                     | 2024.1/stable      | 70 |
+| magnum-dashboard                | magnum-dashboard           | 2024.1/stable      | 59 |
+| magnum-hacluster                | hacluster                  | 2.4/stable         | 131 |
+| magnum-mysql-router             | mysql-router               | 8.0/stable         | 1154 |
+| memcached                       | memcached                  | latest/stable      | 39 |
+| mysql-innodb-cluster            | mysql-innodb-cluster       | 8.0/stable         | 159 |
+| ncc-mysql-router                | mysql-router               | 8.0/stable         | 1136 |
+| neutron-api                     | neutron-api                | 2024.1/stable      | 650 |
+| neutron-api-hacluster           | hacluster                  | 2.4/stable         | 131 |
+| neutron-api-mysql-router        | mysql-router               | 8.0/stable         | 1154 |
+| neutron-api-plugin-ovn          | neutron-api-plugin-ovn     | 2024.1/stable      | 178 |
+| nova-cloud-controller           | nova-cloud-controller      | 2024.1/stable      | 795 |
+| nova-cloud-controller-hacluster | hacluster                  | 2.4/stable         | 131 |
+| nova-compute                    | nova-compute               | 2024.1/stable      | 827 |
+| octavia                         | octavia                    | 2024.1/stable      | 441 |
+| octavia-dashboard               | octavia-dashboard          | 2024.1/stable      | 120 |
+| octavia-diskimage-retrofit      | octavia-diskimage-retrofit | 2024.1/stable      | 196 |
+| octavia-hacluster               | hacluster                  | 2.4/stable         | 131 |
+| octavia-mysql-router            | mysql-router               | 8.0/stable         | 1154 |
+| openstack-dashboard             | openstack-dashboard        | 2024.1/stable      | 728 |
+| openstack-dashboard-hacluster   | hacluster                  | 2.4/stable         | 131 |
+| ovn-central                     | ovn-central                | 24.03/stable       | 311 |
+| ovn-chassis                     | ovn-chassis                | 24.03/stable       | 396 |
+| ovn-chassis-octavia             | ovn-chassis                | 24.03/stable       | 396 |
+| placement                       | placement                  | 2024.1/stable      | 125 |
+| placement-hacluster             | hacluster                  | 2.4/stable         | 131 |
+| placement-mysql-router          | mysql-router               | 8.0/stable         | 1154 |
+| rabbitmq-server                 | rabbitmq-server            | 3.9/stable         | 295 |
+| vault                           | vault                      | 1.8/stable         | 372 |
+| vault-mysql-router              | mysql-router               | 8.0/stable         | 1136 |
+
+Notes:
+- memcached is on `latest/stable` (rev 39) -- the only charm not on a versioned
+  track. AT PRE-FLIGHT run `juju info memcached` to list available tracks; if no
+  stable versioned track exists, either pin revision 39 explicitly in the bundle
+  or accept `latest/stable` knowingly. Flagged as a drift candidate.
+- mysql-router subordinates show mixed as-built revisions (most 1154; the
+  ncc/dashboard/vault routers at 1136) on the SAME `8.0/stable` channel. This is
+  benign under channel-pinning (all resolve to current `8.0/stable` on redeploy);
+  recorded only for completeness.
+- EXCLUDED from the bundle: the `k8s` charm (channel `1.32/stable`) deployed on
+  Juju machine 4 / MAAS `capi-mgmt` (10.12.4.100). That is the retired D-033
+  out-of-cloud node, slated for Phase 7 teardown; the in-cloud mgmt cluster
+  (D-035) replaces it. It is intentionally absent here.
+
+## B.2 In-cloud management cluster + CAPI constellation (D-034 / D-035 / D-037)
+
+Node `capi-mgmt-v2` (FIP 10.12.7.40, internal 10.20.0.45), single-node, non-CAPI-managed:
+- k8s-snap: channel `1.32-classic/stable`, rev 5326, k8s v1.32.13 (classic confinement)
+- CAPI core + kubeadm-bootstrap + kubeadm-control-plane: v1.13.2
+- CAPO (infra provider): v0.14.4
+- cert-manager: v1.20.2
+- ORC: v2.5.0   [install BEFORE `clusterctl init` -- CAPO v0.14.4 hard-deps the ORC Image CRD]
+- CAAPH (cluster-api-addon-provider): chart 0.12.0 (`helm --version`, from dependencies.json; deploys image 62f7c00)
+- cluster-api-janitor-openstack: chart 0.11.0 (`helm --version`, from dependencies.json; deploys image d527847)
+- cluster-autoscaler (per-workload): v1.30.4
+- Mgmt CNI: Cilium 1.17.12-ck0. Workload-cluster CNI: Calico (chart default).
+
+VERSION-SOURCE RULE (D-034): every provider ref above is read live from the chosen
+`capi-helm-charts` release tag's `dependencies.json` via `jq`. DO NOT hardcode
+semver in IaC -- this table is a snapshot for redeploy comparison only.
+
+## B.3 Magnum driver + chart (Layer B -- outside Juju channels, manually pinned)
+
+- magnum-capi-helm driver: 1.3.0 was the AS-FIRST-BUILT pin; the v1 TARGET is the
+  RELEASED `magnum-capi-helm==1.4.0` (D-042). 1.3.0 is contract-INCOHERENT with the
+  Layer-A core -- it reads `apiVersion` off the infrastructureRef, which CAPI v1.13
+  (v1beta2 contract) no longer carries, so the driver's `infrastructure` health GET
+  returns "not found" (cosmetic only -- the create path is unaffected; the chart
+  templates resource versions). (1.3.0 also supersedes D-007's `1.1.0` and the late-May
+  `1.2.0` note -- both stale; Review-later: reconcile design-decisions.md.)
+- DRIVER DECISION (D-042, amends D-034): pin the RELEASED `magnum-capi-helm==1.4.0`
+  (the "generalize-api-resources" feature; released line 1.0.0/1.1.0/1.2.0/1.2.1/1.3.0/
+  1.4.0). 1.4.0 resolves each resource query as
+  `api_resources.get(<Kind>,{}).get("api_version", <code-default>)`; the driver's CODE
+  defaults are v1beta1 for the CAPI core kinds, but the `api_resources` OPTION itself
+  defaults to an EMPTY map `{}` (the v1beta1 values are code-level fallbacks, NOT option
+  defaults). CAPI v1.13.2 / CAPO v0.14.4 serve v1beta1, so an empty map yields matching
+  v1beta1 lookups -- set `api_resources = {}` EXPLICITLY (phase-07 7.5: the option's
+  registered default is a dict and the driver `json.loads()` it; an explicit string `{}`
+  avoids the oslo coercion question). Override a kind only if it serves v1beta2-only.
+  Same pin for testcloud and Roosevelt. RULE: the Layer-B
+  driver pin MUST be contract-coherent with the Layer-A CAPI core; verify that
+  intersection at deploy. Install: phase-07 7.3-7.6.
+- chart repo: https://azimuth-cloud.github.io/capi-helm-charts
+- chart name: openstack-cluster ; default_helm_chart_version: 0.25.1
+- conf.d drop-in: /etc/magnum/magnum.conf.d/00-capi-helm.conf (D-037)
+- note (CNI): the `capi-k8s-v1-32` template OMITS the Magnum `network_driver` field, so
+  the workload cluster gets the chart-default Calico (the as-built CNI). Whether 1.4.0
+  honors `network_driver` is unverified and not relied on -- omitting the field is what
+  guarantees Calico (appendix-A: CNI-label; phase-08).
+- v1 END STATE: 1.4.0 installed and `health_status = HEALTHY` (D-011). 1.3.0 is only a
+  TEMPORARY rollback/holding state (phase-07 Rollback), never a v1 completion. Either
+  way, do NOT wire magnum auto-heal to health_status (CAPI MachineHealthCheck handles
+  healing independently -- proven during the D-040 OOM recovery).
+
+## B.4 Pre-flight checklist (redeploy)
+
+1. `scripts/pre-flight-checks.sh` -- verify every channel above still resolves on Charmhub.
+2. `juju info memcached` -- confirm track decision (see B.1 note).
+3. Read CAPI constellation live from `dependencies.json` (D-034); compare to B.2.
+4. Driver (D-042): pin the RELEASED `magnum-capi-helm==1.4.0` (contract-coherent with the
+   Layer-A CAPI core; `api_resources` defaults to v1beta1, which CAPI v1.13.2 serves).
+   Confirm 1.4.0 still resolves on PyPI and that the cluster serves v1beta1 (phase-07 7.3).
diff --git a/runbooks/deprecated/00-pre-deploy.md b/runbooks/deprecated/00-pre-deploy.md
deleted file mode 100644
index aa07d86..0000000
--- a/runbooks/deprecated/00-pre-deploy.md
+++ /dev/null
@@ -1,142 +0,0 @@
-# Runbook 00 — Pre-Deploy
-
-## Purpose
-
-Prepare for a clean Caracal rebuild of the VR0 DC0 Omega Cloud. Capture all
-state needed for rollback, gracefully tear down dependent workloads, and verify
-the destination environment is ready before destroying the existing OpenStack
-model.
-
-## Prerequisites
-
-- SSH access to jumphost `vopenstack-jesse` as `jessea123`
-- `admin-openrc` and `user1-openrc` available in `$HOME`
-- Access to the Juju controller hosting the `openstack` model
-- Access to the capi-mgmt.maas k3s cluster (kubeconfig present)
-- NetBox IPv4 imports completed (per `netbox/ipv4-prefixes-import.py`)
-- NetBox VLAN imports completed (per `netbox/vlans-import.py`)
-
-## Phase 1 — Verify NetBox readiness (gating)
-
-Run the verification path of the NetBox import scripts. Confirm all entries
-appear correctly scoped to VR0 DC0.
-
-```bash
-cd ~/vr0-dc0-caracal
-NETBOX_URL=https://netbox.baldurkeep.com NETBOX_TOKEN=<token> \
-  python3 netbox/ipv4-prefixes-import.py --verify-only
-NETBOX_URL=https://netbox.baldurkeep.com NETBOX_TOKEN=<token> \
-  python3 netbox/vlans-import.py --verify-only
-```
-
-Expected: all prefixes and VLANs report scope-OK, no MISSING entries.
-
-## Phase 2 — Capture current state
-
-Backups needed for potential rollback:
-
-```bash
-# Vault unseal keys and root CA cert
-juju ssh vault/0 -- sudo cat /var/snap/vault/common/vault.crt > ~/backups/$(date +%F)/vault-root-ca.crt
-# (Unseal keys MUST be on file from initial Vault setup; verify presence)
-ls -la ~/.vault-keys
-
-# Export current bundle
-juju export-bundle --model openstack > ~/backups/$(date +%F)/bundle-pre-rebuild.yaml
-
-# Snapshot of current 'juju status'
-juju status --model openstack --format=yaml > ~/backups/$(date +%F)/juju-status-pre-rebuild.yaml
-
-# Inventory of FIPs and tenant resources we might want to recreate
-source ~/admin-openrc
-openstack floating ip list -c "Floating IP Address" -c "Fixed IP Address" \
-  -c "Project" -f csv > ~/backups/$(date +%F)/floating-ips.csv
-openstack server list --all-projects -c ID -c Name -c Project -c Status -f csv \
-  > ~/backups/$(date +%F)/servers.csv
-openstack network list --all-projects -c ID -c Name -c Project -f csv \
-  > ~/backups/$(date +%F)/networks.csv
-openstack loadbalancer list -c id -c name -c project_id -c vip_address -f csv \
-  > ~/backups/$(date +%F)/loadbalancers.csv
-```
-
-## Phase 3 — KVM snapshots of openstack0-3
-
-From the jumphost (which is the hypervisor):
-
-```bash
-for vm in openstack0 openstack1 openstack2 openstack3; do
-  sudo virsh snapshot-create-as --domain "$vm" \
-    --name "pre-caracal-rebuild-$(date +%F)" \
-    --description "Pre-Caracal rebuild baseline" \
-    --atomic
-done
-sudo virsh snapshot-list openstack0
-```
-
-These snapshots are the disaster-recovery point.
-
-## Phase 4 — Graceful CAPI workload teardown (D-013)
-
-Delete the CAPI workload cluster cleanly so its OpenStack resources (LBs, FIPs,
-volumes, Octavia members) are released by CAPI controllers before model destroy.
-
-```bash
-export KUBECONFIG=~/magnum-capi/phase3/capi-mgmt-cluster.kubeconfig
-# (Adjust path if kubeconfig has moved)
-
-# Delete the workload cluster — CAPI handles tenant OpenStack cleanup
-kubectl delete cluster capi-mgmt-cluster -n default
-# Wait for finalizers; this may take ~10 minutes
-kubectl wait --for=delete cluster/capi-mgmt-cluster -n default --timeout=15m
-```
-
-Verify on the OpenStack side that resources were released:
-
-```bash
-source ~/admin-openrc
-openstack server list --all-projects | grep -i capi || echo "No CAPI servers remaining"
-openstack loadbalancer list | grep -i capi || echo "No CAPI LBs remaining"
-openstack floating ip list -c "Floating IP Address" -c "Fixed IP Address" -f csv
-```
-
-## Phase 5 — Preserve capi-mgmt.maas itself
-
-The bootstrap k3s + CAPI controllers on `capi-mgmt.maas` are NOT destroyed —
-they will be re-used post-rebuild as the Magnum CAPI mgmt plane. Verify the
-controllers are still healthy:
-
-```bash
-ssh capi-mgmt.maas -- sudo kubectl --kubeconfig /etc/rancher/k3s/k3s.yaml \
-  get pods -A
-```
-
-Confirm:
-- `capi-system` namespace pods Running
-- `capo-system` (CAPI OpenStack provider) pods Running
-- `cert-manager` pods Running
-- `orc-system` (OpenStack Resource Controller) pods Running
-
-## Phase 6 — Final go/no-go checklist
-
-Do not proceed to `runbooks/01-destroy-model.md` until all of the following pass:
-
-- [ ] NetBox verification clean
-- [ ] Vault unseal keys backed up and verified readable
-- [ ] `bundle-pre-rebuild.yaml` exists and is non-empty
-- [ ] `juju-status-pre-rebuild.yaml` shows desired-pre-destroy state captured
-- [ ] All four KVM snapshots created (`virsh snapshot-list` confirms)
-- [ ] CAPI workload cluster deletion completed (`kubectl get cluster` returns
-      "no resources found")
-- [ ] OpenStack-side resources from CAPI workload are released (no orphaned LBs,
-      FIPs, volumes)
-- [ ] capi-mgmt.maas k3s cluster controllers all Running
-
-## Notes
-
-- Snapshot disk space consumption can grow significantly during the rebuild
-  window. Verify free space on `/var/lib/libvirt/images` prior to running
-  the rebuild deploy.
-- If Vault unseal keys cannot be located, STOP. A failed Vault re-init without
-  the original keys means lost issued certificates and is destructive to any
-  data sealed under the existing root key. This MUST be confirmed before model
-  destroy.
diff --git a/runbooks/deprecated/01a-octavia-pki-generation.md b/runbooks/deprecated/01a-octavia-pki-generation.md
deleted file mode 100644
index 65bd707..0000000
--- a/runbooks/deprecated/01a-octavia-pki-generation.md
+++ /dev/null
@@ -1,650 +0,0 @@
-# Runbook 01a — Octavia LBaaS PKI generation
-
-**Status:** Pre-deploy execution. Runs between `01-destroy-model.md` and `02-deploy.md`.
-**Numbering rationale:** Octavia PKI artifacts must exist on the deploy host before
-`juju deploy` is invoked (the values are referenced by the overlay file). Placing
-this between destroy and deploy aligns generation with the "fresh rebuild" framing.
-
-**Cross-references:**
-- D-007 (Octavia in bundle from day one)
-- Bundle `octavia.options` PKI material section
-- `overlays/octavia-pki.yaml` (gitignored — output of this runbook)
-- Workstream 3a decision (2026-05-22): generate fresh, EC P-384 CAs, overlay-file approach
-
----
-
-## 1. Purpose & scope
-
-This runbook generates a complete two-tier PKI for Charmed Octavia's
-amphora load-balancer trust domain:
-
-- **Issuing CA** — Octavia uses this to sign each amphora's server certificate
-  at LB-creation time. Octavia receives the **private key** and **passphrase**.
-- **Controller CA** — amphorae's trust anchor for connections **from** the
-  Octavia controller. Octavia only receives the **cert** (no key needed at
-  runtime); the controller's identity is proved by:
-- **Controller certificate** — signed by Controller CA, presented by the
-  Octavia controller to each amphora. Bundled as cert + key into a single
-  PEM blob.
-
-Five charm options consume the artifacts (`octavia` application):
-
-| Charm option | Content | Format |
-|---|---|---|
-| `lb-mgmt-issuing-cacert` | Issuing CA certificate | base64-encoded PEM |
-| `lb-mgmt-issuing-ca-private-key` | Issuing CA encrypted private key | base64-encoded PEM (already encrypted with passphrase) |
-| `lb-mgmt-issuing-ca-key-passphrase` | Issuing CA key passphrase | plain string (NOT base64) |
-| `lb-mgmt-controller-cacert` | Controller CA certificate | base64-encoded PEM |
-| `lb-mgmt-controller-cert` | Controller cert + key, concatenated | base64-encoded PEM bundle |
-
-**Scope:** v1 testcloud (VR0 DC0 Omega Cloud). Roosevelt deltas documented in
-section 14.
-
-**Out of scope:** Octavia API TLS (issued by Vault via `octavia:certificates`
-relation); rotation procedure (deferred to Roosevelt runbook).
-
----
-
-## 2. Decisions captured
-
-Per workstream 3a sign-off (2026-05-22):
-
-| Decision | Choice | Roosevelt parallel |
-|---|---|---|
-| Cert provenance | Generate fresh (no Bobcat-backup copy) | Vault PKI engine |
-| CA key algorithm | EC P-384 | EC P-384 (Vault root) |
-| Controller cert algorithm | EC P-256 | EC P-256 |
-| CA validity | 10 years | 5-year intermediate, Vault-rotated |
-| Controller cert validity | 2 years | 90 days, auto-rotated |
-| Distribution method | Juju overlay file (gitignored) | Vault-injected at deploy |
-| Storage path on jumphost | `$HOME/octavia-pki/` | Vault PKI mounts |
-| Passphrase strength | 32 random bytes, base64-encoded (44 chars) | Vault-generated |
-
-**Naming convention:**
-
-- Issuing CA CN: `VR0 DC0 Omega Cloud Octavia Issuing CA`
-- Controller CA CN: `VR0 DC0 Omega Cloud Octavia Controller CA`
-- Controller cert CN: `octavia-controller.omega.dc0.vr0.cloud.neumatrix.local`
-- Controller cert SANs: above CN, plus `octavia.omega.dc0.vr0.cloud.neumatrix.local`, plus `10.12.4.233` (the Octavia API VIP per workstream 2)
-- Organization (O): `Neumatrix`
-
----
-
-## 3. Prerequisites
-
-- Executor is on jumphost `vopenstack-jesse` as `jessea123`.
-- `openssl` version 3.x or later installed (`openssl version` to confirm).
-- `$HOME` is writable (snap-confined `openstackclients` cannot read `/tmp`;
-  all paths must resolve under `$HOME`).
-- Git repository `openstack-caracal-ipv4` cloned on jumphost at a known path
-  (referred to as `$REPO` throughout). Set this in the executor's shell:
-  ```bash
-  export REPO=$HOME/repos/openstack-caracal-ipv4   # adjust to actual clone path
-  ```
-- Repository is on `main` branch and clean (`cd $REPO && git status` shows clean tree).
-- Previous workstream 2 commit has been pushed (bundle has the VIP assignments and
-  active hacluster stack — verify with `grep -c "^      vip: 10.12.4." "$REPO/bundle.yaml"`,
-  expect 12).
-
----
-
-## 4. Pre-flight: gitignore patch (DO THIS FIRST)
-
-**Critical:** the `.gitignore` patch goes in BEFORE any private key material
-exists on disk. This minimizes the race window for an accidental commit.
-
-```bash
-cd "$REPO"
-
-# Append to .gitignore (idempotent — check if already present first)
-grep -q "octavia-pki.yaml" .gitignore || cat >> .gitignore <<'EOF'
-
-# Octavia PKI artifacts — NEVER commit
-overlays/octavia-pki.yaml
-octavia-pki/
-*.key
-*.key.enc
-passphrase.txt
-EOF
-
-# Review the diff
-git diff .gitignore
-
-# Commit and push BEFORE generating any keys
-git add .gitignore
-git commit -m "gitignore: octavia PKI artifacts and overlay (runbook 01a)"
-git push origin main
-```
-
-**Verify the gitignore is effective:**
-
-```bash
-# This should NOT show overlays/octavia-pki.yaml even as untracked
-touch overlays/octavia-pki.yaml
-git status --short overlays/  # expect: empty output for octavia-pki.yaml
-rm overlays/octavia-pki.yaml
-```
-
-If the test file does show as untracked, **STOP** and fix the gitignore syntax before
-generating any secrets.
-
----
-
-## 5. Workspace setup
-
-```bash
-WORKDIR=$HOME/octavia-pki
-mkdir -p "$WORKDIR"/{issuing-ca,controller-ca,controller,overlay-build}
-chmod 700 "$WORKDIR"
-cd "$WORKDIR"
-echo "Working in: $WORKDIR"
-```
-
-Resulting layout:
-
-```
-$HOME/octavia-pki/
-├── issuing-ca/           # passphrase.txt, .key.enc, .cert.pem
-├── controller-ca/        # passphrase.txt, .key.enc, .cert.pem
-├── controller/           # .key, .csr, .cert.pem, .bundle.pem, .cnf
-└── overlay-build/        # base64 intermediates → consumed by step 10
-```
-
----
-
-## 6. Generate Issuing CA
-
-EC P-384 key encrypted with random 32-byte passphrase. Self-signed cert, 10y validity.
-
-```bash
-cd "$WORKDIR/issuing-ca"
-
-# Generate passphrase (no trailing newline — required for clean YAML embedding)
-openssl rand -base64 32 | tr -d '\n' > passphrase.txt
-chmod 600 passphrase.txt
-
-# Sanity-check
-test $(wc -c < passphrase.txt) -eq 44 || { echo "ERROR: passphrase length wrong"; exit 1; }
-
-# Generate EC P-384 private key, encrypted with passphrase
-openssl genpkey -algorithm EC \
-  -pkeyopt ec_paramgen_curve:P-384 \
-  -aes-256-cbc \
-  -pass file:passphrase.txt \
-  -out issuing-ca.key.enc
-chmod 600 issuing-ca.key.enc
-
-# Self-sign cert (10 years, SHA-384)
-openssl req -new -x509 -sha384 \
-  -key issuing-ca.key.enc \
-  -passin file:passphrase.txt \
-  -days 3650 \
-  -subj "/CN=VR0 DC0 Omega Cloud Octavia Issuing CA/O=Neumatrix" \
-  -out issuing-ca.cert.pem
-
-# Verify
-openssl x509 -in issuing-ca.cert.pem -noout -dates -subject
-openssl verify -CAfile issuing-ca.cert.pem issuing-ca.cert.pem
-# Expect: issuing-ca.cert.pem: OK
-
-ls -la
-```
-
----
-
-## 7. Generate Controller CA
-
-Identical pattern; different CN.
-
-```bash
-cd "$WORKDIR/controller-ca"
-
-openssl rand -base64 32 | tr -d '\n' > passphrase.txt
-chmod 600 passphrase.txt
-test $(wc -c < passphrase.txt) -eq 44 || { echo "ERROR: passphrase length wrong"; exit 1; }
-
-openssl genpkey -algorithm EC \
-  -pkeyopt ec_paramgen_curve:P-384 \
-  -aes-256-cbc \
-  -pass file:passphrase.txt \
-  -out controller-ca.key.enc
-chmod 600 controller-ca.key.enc
-
-openssl req -new -x509 -sha384 \
-  -key controller-ca.key.enc \
-  -passin file:passphrase.txt \
-  -days 3650 \
-  -subj "/CN=VR0 DC0 Omega Cloud Octavia Controller CA/O=Neumatrix" \
-  -out controller-ca.cert.pem
-
-openssl x509 -in controller-ca.cert.pem -noout -dates -subject
-openssl verify -CAfile controller-ca.cert.pem controller-ca.cert.pem
-# Expect: controller-ca.cert.pem: OK
-```
-
-**Why Controller CA's key is encrypted even though Octavia never uses it:**
-The Controller CA key is needed for future rotations of the controller cert.
-Encrypting it (with its own passphrase, separate from Issuing CA's) is defense
-in depth — if the jumphost is compromised, the key still requires the
-passphrase to be useful for forging controller certs.
-
----
-
-## 8. Generate Controller certificate
-
-EC P-256 key (no encryption — Octavia must read it at startup), CSR with SAN
-extensions, signed by Controller CA, 2y validity.
-
-```bash
-cd "$WORKDIR/controller"
-
-# Generate unencrypted EC P-256 key
-openssl genpkey -algorithm EC \
-  -pkeyopt ec_paramgen_curve:P-256 \
-  -out controller.key
-chmod 600 controller.key
-
-# CSR config with SAN extensions
-cat > controller.cnf <<'EOF'
-[req]
-distinguished_name = req_distinguished_name
-req_extensions = v3_req
-prompt = no
-
-[req_distinguished_name]
-CN = octavia-controller.omega.dc0.vr0.cloud.neumatrix.local
-O = Neumatrix
-
-[v3_req]
-keyUsage = critical, digitalSignature, keyEncipherment
-extendedKeyUsage = clientAuth, serverAuth
-subjectAltName = @alt_names
-
-[alt_names]
-DNS.1 = octavia-controller.omega.dc0.vr0.cloud.neumatrix.local
-DNS.2 = octavia.omega.dc0.vr0.cloud.neumatrix.local
-IP.1 = 10.12.4.233
-EOF
-
-# Generate CSR
-openssl req -new -sha256 \
-  -key controller.key \
-  -config controller.cnf \
-  -out controller.csr
-
-# Sign with Controller CA (2 years)
-openssl x509 -req -sha256 \
-  -in controller.csr \
-  -CA "$WORKDIR/controller-ca/controller-ca.cert.pem" \
-  -CAkey "$WORKDIR/controller-ca/controller-ca.key.enc" \
-  -passin file:"$WORKDIR/controller-ca/passphrase.txt" \
-  -CAcreateserial \
-  -days 730 \
-  -extfile controller.cnf \
-  -extensions v3_req \
-  -out controller.cert.pem
-
-# Bundle cert + key (the lb-mgmt-controller-cert option expects both in one PEM)
-cat controller.cert.pem controller.key > controller.bundle.pem
-chmod 600 controller.bundle.pem
-```
-
-**Verify the chain and SAN:**
-
-```bash
-# Chain verifies
-openssl verify -CAfile "$WORKDIR/controller-ca/controller-ca.cert.pem" controller.cert.pem
-# Expect: controller.cert.pem: OK
-
-# SAN extensions present
-openssl x509 -in controller.cert.pem -noout -ext subjectAltName
-# Expect:
-#     DNS:octavia-controller.omega.dc0.vr0.cloud.neumatrix.local,
-#     DNS:octavia.omega.dc0.vr0.cloud.neumatrix.local,
-#     IP Address:10.12.4.233
-
-# Validity
-openssl x509 -in controller.cert.pem -noout -dates
-# Expect: notAfter ~2 years from today
-
-# Bundle integrity (cert + key match)
-openssl x509 -in controller.bundle.pem -noout -pubkey > /tmp/cert.pub
-openssl pkey -in controller.bundle.pem -pubout > /tmp/key.pub
-diff /tmp/cert.pub /tmp/key.pub && echo "Bundle cert/key match"
-rm /tmp/cert.pub /tmp/key.pub
-```
-
----
-
-## 9. Final chain verification
-
-A standalone block to confirm the full chain is sound before consuming for Octavia:
-
-```bash
-cd "$WORKDIR"
-
-echo "=== Issuing CA ==="
-openssl x509 -in issuing-ca/issuing-ca.cert.pem -noout -subject -dates
-openssl verify -CAfile issuing-ca/issuing-ca.cert.pem issuing-ca/issuing-ca.cert.pem
-
-echo ""
-echo "=== Controller CA ==="
-openssl x509 -in controller-ca/controller-ca.cert.pem -noout -subject -dates
-openssl verify -CAfile controller-ca/controller-ca.cert.pem controller-ca/controller-ca.cert.pem
-
-echo ""
-echo "=== Controller cert ==="
-openssl x509 -in controller/controller.cert.pem -noout -subject -dates
-openssl verify -CAfile controller-ca/controller-ca.cert.pem controller/controller.cert.pem
-```
-
-All three "verify" lines must show `: OK`. If any do not, **STOP** and investigate
-before proceeding.
-
----
-
-## 10. Base64-encode artifacts
-
-Each base64 file is a single line (no wrapping); each becomes one YAML value.
-
-```bash
-cd "$WORKDIR/overlay-build"
-
-# Issuing CA cert (base64)
-base64 -w0 "$WORKDIR/issuing-ca/issuing-ca.cert.pem" > issuing-cacert.b64
-
-# Issuing CA private key (already encrypted PEM → base64)
-base64 -w0 "$WORKDIR/issuing-ca/issuing-ca.key.enc" > issuing-ca-private-key.b64
-
-# Controller CA cert
-base64 -w0 "$WORKDIR/controller-ca/controller-ca.cert.pem" > controller-cacert.b64
-
-# Controller cert + key bundle
-base64 -w0 "$WORKDIR/controller/controller.bundle.pem" > controller-cert.b64
-
-# Sanity-check sizes (expect 500-2000 chars each)
-wc -c *.b64
-```
-
----
-
-## 11. Assemble the overlay file
-
-```bash
-# Read each artifact into shell variables
-ISSUING_CACERT=$(cat "$WORKDIR/overlay-build/issuing-cacert.b64")
-ISSUING_CA_KEY=$(cat "$WORKDIR/overlay-build/issuing-ca-private-key.b64")
-ISSUING_CA_PASS=$(cat "$WORKDIR/issuing-ca/passphrase.txt")
-CONTROLLER_CACERT=$(cat "$WORKDIR/overlay-build/controller-cacert.b64")
-CONTROLLER_CERT=$(cat "$WORKDIR/overlay-build/controller-cert.b64")
-
-# Assemble overlay (note: passphrase is YAML-quoted; cert blobs are not — they're
-# guaranteed-safe base64 without special chars)
-mkdir -p "$REPO/overlays"
-cat > "$REPO/overlays/octavia-pki.yaml" <<EOF
-# Octavia LBaaS PKI overlay — SENSITIVE — NEVER COMMIT
-# Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) UTC
-# Source: runbooks/01a-octavia-pki-generation.md
-# Issuing CA, Controller CA, Controller cert all generated fresh per workstream 3a.
-#
-# This file is gitignored. If you see it staged or committed, .gitignore is broken.
-
-applications:
-  octavia:
-    options:
-      lb-mgmt-issuing-cacert: ${ISSUING_CACERT}
-      lb-mgmt-issuing-ca-private-key: ${ISSUING_CA_KEY}
-      lb-mgmt-issuing-ca-key-passphrase: "${ISSUING_CA_PASS}"
-      lb-mgmt-controller-cacert: ${CONTROLLER_CACERT}
-      lb-mgmt-controller-cert: ${CONTROLLER_CERT}
-EOF
-
-chmod 600 "$REPO/overlays/octavia-pki.yaml"
-
-# Unset the shell variables (they held key material)
-unset ISSUING_CACERT ISSUING_CA_KEY ISSUING_CA_PASS CONTROLLER_CACERT CONTROLLER_CERT
-```
-
-**Validate the overlay parses as YAML:**
-
-```bash
-python3 -c "import yaml; d = yaml.safe_load(open('$REPO/overlays/octavia-pki.yaml')); \
-  o = d['applications']['octavia']['options']; \
-  print('Keys present:', sorted(o.keys())); \
-  print('All values non-empty:', all(v for v in o.values()))"
-# Expect: 5 keys listed; "All values non-empty: True"
-```
-
-**Confirm gitignore is doing its job:**
-
-```bash
-cd "$REPO"
-git status --short
-# overlays/octavia-pki.yaml MUST NOT appear here
-# If it does — STOP, shred the file, fix .gitignore, regenerate
-```
-
----
-
-## 12. Bundle.yaml housekeeping
-
-The `octavia` application in `bundle.yaml` still has commented placeholder lines
-for the 5 PKI options plus the `TODO(octavia-cert):` block. These should be
-removed and replaced with a pointer to the overlay.
-
-**Replace this block in `bundle.yaml`** (inside `octavia.options:`):
-
-```yaml
-      # ----- PKI material (4 cert blobs + passphrase) ---------------------
-      # TODO(octavia-cert): inline values BEFORE deploy. Two sources:
-      #   (a) Copy from Bobcat backup at:
-      #       ~/backups/pre-caracal-destroy-2026-05-22/bundle-pre-destroy.yaml
-      #       (lines ~230-234; CA valid until 2027-05-15 — adequate for testcloud)
-      #   (b) Generate fresh via the (yet-to-be-written) octavia-cert-runbook
-      #       — required for Roosevelt deploy
-      # lb-mgmt-controller-cacert: <base64 PEM>
-      # lb-mgmt-controller-cert: <base64 PEM cert + key>
-      # lb-mgmt-issuing-ca-key-passphrase: <passphrase string>
-      # lb-mgmt-issuing-ca-private-key: <base64 encrypted PEM>
-      # lb-mgmt-issuing-cacert: <base64 PEM>
-```
-
-**With this block:**
-
-```yaml
-      # ----- PKI material -------------------------------------------------
-      # 5 lb-mgmt-* options are supplied via overlays/octavia-pki.yaml
-      # (gitignored). Generated per runbooks/01a-octavia-pki-generation.md.
-      # Deploy with:
-      #   juju deploy ./bundle.yaml \
-      #     --overlay overlays/vr0-dc0-testcloud.yaml \
-      #     --overlay overlays/octavia-pki.yaml
-```
-
-Commit this bundle change separately from the overlay generation work:
-
-```bash
-cd "$REPO"
-git diff bundle.yaml
-git add bundle.yaml
-git commit -m "bundle: octavia PKI moves to overlay (runbook 01a)
-
-Remove inline placeholders + TODO(octavia-cert) block. PKI values now
-supplied via overlays/octavia-pki.yaml (gitignored), generated per
-runbooks/01a-octavia-pki-generation.md. Decision per workstream 3a
-(2026-05-22): industry-best-practice secret handling on testcloud
-to rehearse Roosevelt's Vault-PKI-backed posture."
-git push origin main
-```
-
----
-
-## 13. Sensitive-file backup
-
-The Issuing CA private key + its passphrase are the crown jewels of the LB trust
-domain. Loss → cannot sign new amphora certs (LBs gradually break). Exposure →
-attacker can forge amphora identities and intercept tenant LB traffic.
-
-**Minimum backup for testcloud:**
-
-```bash
-cd $HOME
-BACKUP_NAME="octavia-pki-backup-$(date +%Y%m%d-%H%M%S).tar.gz"
-
-tar -czf "$BACKUP_NAME" -C $HOME octavia-pki/
-
-# Encrypt with strong symmetric cipher
-gpg --symmetric --cipher-algo AES256 --output "${BACKUP_NAME}.gpg" "$BACKUP_NAME"
-
-# Shred the unencrypted tar
-shred -uvz "$BACKUP_NAME"
-
-ls -la "${BACKUP_NAME}.gpg"
-```
-
-**Move `${BACKUP_NAME}.gpg` off-host** (your decision — admin workstation
-encrypted drive, password-manager attachment, dedicated secrets vault, etc.).
-Do NOT leave it sitting in $HOME on the jumphost long-term — that's a single
-point of compromise.
-
-**Roosevelt note:** Vault PKI engine stores all of this — no manual backup
-required; Vault's own backup mechanism covers it. The procedure above is
-testcloud-only.
-
----
-
-## 14. Cleanup of intermediates
-
-After successful deploy + verification (section 14), shred files that are not
-needed for future rotation:
-
-```bash
-# Optional: shred the base64 intermediates (regeneratable from PEM sources)
-shred -uvz "$WORKDIR/overlay-build/"*.b64
-rmdir "$WORKDIR/overlay-build"
-
-# Optional: shred the CSR (regeneratable if needed)
-shred -uvz "$WORKDIR/controller/controller.csr"
-
-# DO NOT shred any of the following — they are needed for future operations:
-#   - issuing-ca/{issuing-ca.cert.pem, issuing-ca.key.enc, passphrase.txt}
-#   - controller-ca/{controller-ca.cert.pem, controller-ca.key.enc, passphrase.txt}
-#   - controller/{controller.key, controller.cert.pem, controller.bundle.pem, controller.cnf}
-#
-# Specifically:
-#   - Issuing CA artifacts: required for signing new amphoras (Octavia uses them runtime)
-#   - Controller CA artifacts: required for signing new controller certs (rotation)
-#   - Controller cert/key: required to repopulate the overlay if jumphost is rebuilt
-```
-
----
-
-## 15. Post-deploy verification
-
-After `runbooks/02-deploy.md` completes (`juju deploy` with the overlay),
-verify Octavia is healthy and the PKI plumbing works.
-
-```bash
-# Octavia charm active/idle
-juju status octavia
-# Expect: octavia/0 active idle
-
-# Octavia services running
-juju ssh octavia/0 -- sudo systemctl is-active octavia-api octavia-worker octavia-housekeeping
-# Expect: 3x "active"
-
-# Confirm PKI files landed on the unit
-juju ssh octavia/0 -- sudo ls -la /etc/octavia/certs/
-# Expect: server_ca.cert.pem, server_ca.key.pem, client_ca.cert.pem, client.cert-and-key.pem
-# (filenames are charm-controlled; presence is what matters)
-
-# Confirm Octavia can use them — verbose health-check from the API
-juju ssh octavia/0 -- sudo journalctl -u octavia-api --since "5 minutes ago" \
-  | grep -iE "(cert|ssl|tls|amphora)" | head -20
-# Expect: no errors related to cert loading
-```
-
-**Smoketest — create a test LB once amphora image is available:**
-
-```bash
-# After `octavia-diskimage-retrofit` has populated Glance with the amphora image,
-# and the LBaaS Mgmt network is wired (these are downstream runbook steps),
-# a test LB creation exercises the full PKI chain:
-
-source ~/admin-openrc
-openstack loadbalancer create --name pki-smoketest --vip-subnet-id <provider-subnet>
-
-# Watch for amphora spawn (3-5 minutes typical)
-watch -n5 'openstack loadbalancer show pki-smoketest'
-# Wait for: provisioning_status=ACTIVE, operating_status=ONLINE
-
-# Octavia-worker log should show successful amphora handshake (signed by Issuing CA,
-# trusted via Controller CA):
-juju ssh octavia/0 -- sudo journalctl -u octavia-worker --since "10 minutes ago" \
-  | grep -iE "(amphora|cert)" | tail -20
-# Expect: "amphora <UUID> connection established" or similar
-# Expect: no TLS handshake errors, no cert validation errors
-
-# Cleanup the smoketest LB
-openstack loadbalancer delete pki-smoketest --cascade
-```
-
-If amphora handshake fails with cert errors, the most likely causes are:
-
-1. SAN mismatch — the controller's connection to amphora uses the cert's CN/SAN;
-   verify the controller cert SAN covers all addresses Octavia uses to reach amphorae.
-2. Bundle/key mismatch — `lb-mgmt-controller-cert` bundle should contain BOTH the
-   cert and the matching private key; if they're for different keys, handshake fails.
-3. Encrypted Issuing CA key + wrong passphrase — verify the passphrase string in
-   the overlay matches what was used at generation.
-
----
-
-## 16. Roosevelt deltas (forward-look)
-
-When this runbook is adapted for Roosevelt bare-metal deploy:
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Issuing CA root | Self-signed | Intermediate signed by Vault root CA |
-| CA storage | Filesystem on jumphost | Vault PKI engine, encrypted at rest |
-| Controller cert validity | 2 years | 90 days |
-| Rotation | Manual (this runbook re-run) | Automated via Vault + cron + bundle redeploy |
-| Backup | gpg tarball, off-host | Vault's own backup mechanism |
-| Amphora image signing | Out of scope for v1 | Image signed by Vault PKI as well |
-| Procedure file | `runbooks/01a-octavia-pki-generation.md` | New runbook in Roosevelt repo |
-
-The procedure structure (generate Issuing CA → Controller CA → Controller cert →
-encode → overlay → backup → deploy) remains identical. Roosevelt just sources
-the CA root from Vault instead of self-signing.
-
----
-
-## 17. Rotation/renewal pointer
-
-For testcloud, the 2-year controller cert and 10-year CAs are intentionally
-"set and forget" — they will outlive the cloud at this scale.
-
-If rotation IS needed before testcloud teardown (e.g., a key leak event), the
-re-run procedure is:
-
-1. Generate new Controller cert signed by **existing** Controller CA (re-run
-   sections 8-9 only).
-2. Regenerate the overlay (section 11) with the new Controller cert; leave all
-   other values unchanged.
-3. `juju config octavia lb-mgmt-controller-cert=<new-base64>` (single-option
-   update; does not require full bundle redeploy).
-4. Octavia services may need a restart: `juju ssh octavia/0 -- sudo systemctl restart octavia-api octavia-worker octavia-housekeeping`.
-5. Existing amphorae will need to reconnect using the new cert; in-flight LBs
-   may briefly drop. This is acceptable for a security-event rotation.
-
-For Roosevelt, this whole procedure is replaced by Vault automated rotation —
-see Roosevelt runbook (TBD).
-
----
-
-## 18. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-22 | Document created. Fresh-generate, EC P-384 CAs, EC P-256 controller cert, overlay-file distribution. | Workstream 3a |
diff --git a/runbooks/deprecated/02-deploy.md b/runbooks/deprecated/02-deploy.md
deleted file mode 100644
index 4a52845..0000000
--- a/runbooks/deprecated/02-deploy.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Runbook 02 — Deploy New Caracal Bundle
-
-**STATUS: PLACEHOLDER** — drafted alongside bundle.yaml.
-
-## Purpose
-
-Deploy the new Charmed OpenStack Caracal bundle and wait for the cloud to
-settle in `active/idle`.
-
-## Prerequisites
-
-- Runbook 01 complete (model destroyed, MAAS state clean)
-- `bundle.yaml` and `overlays/vr0-dc0-testcloud.yaml` drafted and reviewed
-- `scripts/pre-flight-checks.sh` passes
-
-## TODO
-
-- [ ] `juju add-model openstack`
-- [ ] `juju deploy ./bundle.yaml --overlay overlays/vr0-dc0-testcloud.yaml --trust`
-- [ ] Wait for settle (`juju-wait` or `juju status --watch 30s`)
-- [ ] Pause-points for Vault init (per Runbook 03)
-- [ ] Acceptance: all charms `active/idle` modulo Vault (sealed) and any
-      charms waiting on Vault certificates
diff --git a/runbooks/deprecated/03-vault-init.md b/runbooks/deprecated/03-vault-init.md
deleted file mode 100644
index 40db379..0000000
--- a/runbooks/deprecated/03-vault-init.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Runbook 03 — Vault Initialization
-
-**STATUS: PLACEHOLDER** — drafted during deploy phase.
-
-## Purpose
-
-Initialize the Vault instance(s), unseal, authorize, and let certificate
-relations resolve so dependent charms reach `active/idle`.
-
-## Prerequisites
-
-- Bundle deployed; Vault charm in `blocked` waiting for init
-- etcd cluster in `active/idle` (Vault HA backend per D-006)
-- easyrsa active (TLS bootstrap)
-
-## TODO
-
-- [ ] `juju run vault/leader generate-root-ca` — capture root CA cert
-- [ ] `vault operator init -key-shares=5 -key-threshold=3` — capture keys
-- [ ] Unseal with 3 of 5 keys
-- [ ] `juju run vault/leader authorize-charm token=<root-token>`
-- [ ] Verify all `:certificates` relations complete (no charms stuck
-      waiting on certs)
-- [ ] Store unseal keys in `~/.vault-keys/` (chmod 600); back up
diff --git a/runbooks/deprecated/04-magnum-domain.md b/runbooks/deprecated/04-magnum-domain.md
deleted file mode 100644
index b400a18..0000000
--- a/runbooks/deprecated/04-magnum-domain.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Runbook 04 — Magnum Keystone Domain Setup
-
-**STATUS: PLACEHOLDER** — drafted post-deploy.
-
-## Purpose
-
-Run the magnum charm's `domain-setup` action to create the Keystone domain,
-trust role, and service user that Magnum requires for cluster operations.
-
-## Prerequisites
-
-- Magnum charm reached `active/idle` post Vault init
-- Keystone reachable from jumphost via FQDN
-
-## TODO
-
-- [ ] `juju run magnum/leader domain-setup --wait=10m`
-- [ ] Verify creation in Keystone:
-      `openstack domain show magnum`
-      `openstack user show magnum_domain_admin --domain magnum`
-- [ ] Acceptance: domain present, trust role assigned, charm in active/idle
diff --git a/runbooks/deprecated/04a-capi-bootstrap-cluster.md b/runbooks/deprecated/04a-capi-bootstrap-cluster.md
deleted file mode 100644
index d98f20c..0000000
--- a/runbooks/deprecated/04a-capi-bootstrap-cluster.md
+++ /dev/null
@@ -1,1056 +0,0 @@
-# Runbook 04a — CAPI bootstrap cluster
-
-**Status:** Executes after `02-deploy.md` (cloud up + all charms active/idle)
-and `03-vault-init.md` (Vault initialized + root CA available). Precedes
-`05-magnum-capi-driver.md` (driver graft consumes the workload kubeconfig
-produced here).
-
-**D-017 posture:** L3 full teardown and rebuild every deployment cycle.
-Nothing is preserved across cycles. capi-mgmt is wiped to MAAS Ready on
-teardown; rebuilt from scratch by this runbook.
-
-**Cross-references:**
-- D-017 (CAPI bootstrap cluster lifecycle)
-- D-007 (Magnum two-layer install)
-- D-002 (channel matrix — informs Vault CA chain)
-- Workstream 3b decision (2026-05-22): ship Vault CA (no tls-insecure); pivot mandatory
-
----
-
-## 1. Purpose & scope
-
-This runbook stands up the CAPI bootstrap cluster on `capi-mgmt.maas` and
-pivots cluster state into a self-managing workload cluster. Output:
-
-1. **Workload K8s cluster** (`capi-mgmt-cluster`) running in tenant VMs on
-   the cloud, self-managing post-pivot.
-2. **Workload kubeconfig** copied to jumphost at a known path. Consumed by
-   `runbooks/05-magnum-capi-driver.md` for the Magnum CAPI Helm driver
-   graft.
-3. **No remaining state** on the bootstrap k3s VM after pivot. capi-mgmt
-   becomes a disposable jump host.
-
-**Scope:** v1 testcloud. Roosevelt deltas in section 20.
-
-**Out of scope:**
-
-- Magnum-side configuration (runbook 05).
-- Workload cluster's tenant lifecycle (Magnum's job, not this runbook's).
-- Backup / DR for the workload cluster (Roosevelt concern).
-
----
-
-## 2. Decisions captured
-
-Per workstream 3b sign-off (2026-05-22):
-
-| Decision | Choice | Roosevelt parallel |
-|---|---|---|
-| Version pinning | Pin-at-execution with discovery in §4 | Same pattern; pins captured in deploy record |
-| Cloud TLS trust | Ship Vault CA to capi-mgmt + workload nodes (no `tls-insecure`) | Image-baked CA; CK8sConfig redundancy |
-| `clusterctl move` pivot | Mandatory; workload cluster becomes self-managing | Same |
-| K8s flavor | Canonical Kubernetes (CK8s) | Same |
-| OpenStack auth | v3applicationcredential | Same |
-| Pod CIDR | `10.244.0.0/16` | Same (does not conflict with cloud `10.12.0.0/16` or tenant pool `10.20.0.0/16`) |
-| Service CIDR | `10.96.0.0/12` | Same |
-| Workload cluster name | `capi-mgmt-cluster` | Same |
-| Workload node SSH user | `ubuntu` (MAAS/cloud-init convention) | Same |
-
-**Naming convention:**
-
-- Keystone project for CAPI: `capi-mgmt` (in `admin_domain`)
-- Keystone user for CAPI: `capo` (CAPO operator)
-- App credential: `capo-app-cred`
-- Workload image (Glance): `noble-amd64` (existing; do NOT duplicate as `ubuntu-24.04-capi` — Bobcat lesson)
-- Workload flavor: `capi-mgmt-node` (4 vCPU / 4 GiB / 30 GB) — control plane node sizing
-
----
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| Cloud deployed; all charms `active/idle` per D-011 | `juju status --color\| grep -v "active.*idle"` returns only the header |
-| Vault initialized + unsealed | `juju ssh vault/leader -- sudo vault status` shows `Sealed=false` |
-| Vault root CA available on jumphost | `test -f $HOME/vault-pki/root-ca.pem && openssl x509 -in $HOME/vault-pki/root-ca.pem -noout -subject` |
-| Keystone reachable via FQDN | `curl -sf --cacert $HOME/vault-pki/root-ca.pem https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3 \| jq .version.id` returns `"v3.14"` or current |
-| capi-mgmt VM exists in MAAS as Ready | `maas $MAAS_PROFILE machines read \| jq '.[] \| select(.hostname=="capi-mgmt") \| .status_name'` returns `"Ready"` |
-| Admin openrc available | `test -f $HOME/admin-openrc && source $HOME/admin-openrc && openstack token issue \| head -3` |
-| Workspace path under $HOME (snap confinement) | `WORK=$HOME/capi-bootstrap; mkdir -p "$WORK"; cd "$WORK"; pwd` shows under home |
-
-**Set shell context for the runbook:**
-
-```bash
-export REPO=$HOME/repos/openstack-caracal-ipv4   # adjust if your clone is elsewhere
-export WORK=$HOME/capi-bootstrap                  # runbook scratch dir
-export VAULT_CA=$HOME/vault-pki/root-ca.pem       # Vault root CA (from runbook 03)
-export CAPI_MGMT_METAL_IP=10.12.8.21              # capi-mgmt metal interface
-export CAPI_MGMT_PROVIDER_IP=10.12.4.21           # capi-mgmt provider interface
-export CLUSTER_NAME=capi-mgmt-cluster
-mkdir -p "$WORK"
-cd "$WORK"
-```
-
----
-
-## 4. Version discovery (set pins)
-
-Bobcat ran "dynamic latest." This runbook pins explicit versions captured at
-execution time, with the discovery procedure documented inline so each
-rebuild's pins are reproducible AND traceable.
-
-**GitHub API: authenticated vs unauthenticated.** Unauth has 60 req/hr;
-authenticated has 5000. For multiple rebuilds in a day, set a token:
-
-```bash
-# Optional but recommended — avoids rate-limit headaches during rebuild
-export GITHUB_TOKEN=<your-PAT-with-public_repo-read>
-# Or skip if you can tolerate ~10 API calls slowly
-```
-
-**Discover current stable releases:**
-
-```bash
-cd "$WORK"
-
-# Helper: fetch latest stable release tag from a GitHub repo
-gh_latest() {
-  local repo=$1
-  local auth=""
-  [ -n "$GITHUB_TOKEN" ] && auth="-H Authorization: Bearer $GITHUB_TOKEN"
-  curl -sfL $auth "https://api.github.com/repos/$repo/releases/latest" \
-    | jq -r '.tag_name'
-}
-
-# Pin captures (one file per pin, for the deploy-record convention)
-mkdir -p pins
-gh_latest "kubernetes-sigs/cluster-api"                | tee pins/CAPI_VERSION
-gh_latest "kubernetes-sigs/cluster-api-provider-openstack" | tee pins/CAPO_VERSION
-gh_latest "canonical/cluster-api-k8s"                  | tee pins/CK8S_VERSION
-gh_latest "cert-manager/cert-manager"                  | tee pins/CERT_MANAGER_VERSION
-gh_latest "k-orc/openstack-resource-controller"        | tee pins/ORC_VERSION
-gh_latest "k3s-io/k3s"                                 | tee pins/K3S_VERSION
-gh_latest "helm/helm"                                  | tee pins/HELM_VERSION
-
-# Load into shell
-export CAPI_VERSION=$(cat pins/CAPI_VERSION)
-export CAPO_VERSION=$(cat pins/CAPO_VERSION)
-export CK8S_VERSION=$(cat pins/CK8S_VERSION)
-export CERT_MANAGER_VERSION=$(cat pins/CERT_MANAGER_VERSION)
-export ORC_VERSION=$(cat pins/ORC_VERSION)
-export K3S_VERSION=$(cat pins/K3S_VERSION)
-export HELM_VERSION=$(cat pins/HELM_VERSION)
-
-# Display for the deploy log
-cat pins/*_VERSION | paste -d= <(ls pins/) -
-```
-
-**Sanity check:** all values should look like `v1.X.Y` or `v0.X.Y`. If any
-returned `null` or empty, the GitHub API call failed — most likely
-rate-limited. Wait an hour or set `$GITHUB_TOKEN` and retry.
-
-**Capture pins to repo as deploy record:**
-
-The pin files in `$WORK/pins/` should be appended to a deploy-log artifact
-(NOT committed to the repo — these are deploy-time captures). Suggested
-location: `$HOME/deploy-records/$(date +%Y%m%d-%H%M)/capi-pins/`.
-
-```bash
-DEPLOY_RECORD=$HOME/deploy-records/$(date +%Y%m%d-%H%M%S)/capi-pins
-mkdir -p "$DEPLOY_RECORD"
-cp pins/*_VERSION "$DEPLOY_RECORD/"
-ls -la "$DEPLOY_RECORD/"
-```
-
----
-
-## 5. MAAS-deploy capi-mgmt
-
-Prerequisite: capi-mgmt MAAS machine is in `Ready` state (see §3).
-Network config in MAAS:
-
-- **eth0** on metal fabric, DHCP → `10.12.8.21` (MAAS-pinned static lease)
-- **eth1** on provider fabric, static → `10.12.4.21`
-
-Deploy Ubuntu 24.04 (Noble):
-
-```bash
-# Get the capi-mgmt system_id from MAAS
-CAPI_MGMT_SYSTEM_ID=$(maas $MAAS_PROFILE machines read \
-  | jq -r '.[] | select(.hostname=="capi-mgmt") | .system_id')
-echo "capi-mgmt system_id: $CAPI_MGMT_SYSTEM_ID"
-
-# Deploy
-maas $MAAS_PROFILE machine deploy "$CAPI_MGMT_SYSTEM_ID" \
-  distro_series=noble \
-  hwe_kernel=ga-24.04
-```
-
-Poll for `Deployed`:
-
-```bash
-while true; do
-  STATUS=$(maas $MAAS_PROFILE machine read "$CAPI_MGMT_SYSTEM_ID" \
-    | jq -r '.status_name')
-  echo "$(date -Is) capi-mgmt status: $STATUS"
-  [ "$STATUS" = "Deployed" ] && break
-  [ "$STATUS" = "Failed deployment" ] && { echo "FAILED"; exit 1; }
-  sleep 30
-done
-```
-
-Typical deploy time: 5-8 minutes on this hardware.
-
-**SSH reachability:**
-
-```bash
-# MAAS .maas zone may not resolve from jumphost — use IP directly per handoff lessons
-ssh -o StrictHostKeyChecking=accept-new ubuntu@$CAPI_MGMT_METAL_IP -- hostname
-# Expect: capi-mgmt
-```
-
-> **Gotcha:** MAAS-deployed Ubuntu uses the `ubuntu` user, not `jessea123`.
-> See handoff "recurring technical pitfalls."
-
----
-
-## 6. SSH bootstrap + Vault CA install
-
-On the jumphost, prepare a transport bundle of essentials:
-
-```bash
-mkdir -p "$WORK/bootstrap-bundle"
-cp "$VAULT_CA" "$WORK/bootstrap-bundle/vault-ca.crt"
-chmod 644 "$WORK/bootstrap-bundle/vault-ca.crt"
-
-# Bundle pin files so capi-mgmt can read versions
-cp -r "$WORK/pins" "$WORK/bootstrap-bundle/"
-```
-
-SCP and install Vault CA on capi-mgmt:
-
-```bash
-scp -r "$WORK/bootstrap-bundle" ubuntu@$CAPI_MGMT_METAL_IP:/home/ubuntu/
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP <<'EOF'
-set -euo pipefail
-
-# Install Vault CA as a system-trusted root
-sudo cp /home/ubuntu/bootstrap-bundle/vault-ca.crt /usr/local/share/ca-certificates/
-sudo update-ca-certificates 2>&1 | tail -3
-
-# Verify
-openssl s_client -connect keystone.omega.dc0.vr0.cloud.neumatrix.local:5000 \
-  -CApath /etc/ssl/certs -verify_return_error </dev/null 2>&1 \
-  | grep -E "(Verify return code|subject=)" || \
-  { echo "TLS chain verify failed against Keystone — investigate before proceeding"; exit 1; }
-
-# Update apt + base utilities
-sudo apt-get update -qq
-sudo apt-get install -y -qq jq curl yq
-
-# Confirm
-which jq curl yq
-EOF
-```
-
-**Expected:**
-
-- `update-ca-certificates` reports "1 added"
-- `openssl s_client` shows `Verify return code: 0 (ok)` and a Keystone cert
-  whose chain terminates at the Vault CA
-
-> **Why this matters:** Bobcat used `tls-insecure=true` in cloud.conf which
-> skipped this entire trust path. Our workstream 3b decision (ship Vault CA)
-> means OCCM and CAPO will validate certs against this trust store. If TLS
-> verify fails here, OCCM will crashloop later.
-
----
-
-## 7. k3s install
-
-On capi-mgmt:
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "K3S_VERSION=$K3S_VERSION CAPI_MGMT_METAL_IP=$CAPI_MGMT_METAL_IP bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# Install k3s with explicit bind/advertise/SAN flags
-curl -sfL https://get.k3s.io | \
-  INSTALL_K3S_VERSION="$K3S_VERSION" \
-  sh -s - server \
-    --bind-address="$CAPI_MGMT_METAL_IP" \
-    --advertise-address="$CAPI_MGMT_METAL_IP" \
-    --node-ip="$CAPI_MGMT_METAL_IP" \
-    --tls-san="$CAPI_MGMT_METAL_IP" \
-    --tls-san=capi-mgmt.maas \
-    --write-kubeconfig-mode=0644 \
-    --disable=traefik
-
-# Wait for k3s API to respond
-for i in $(seq 1 30); do
-  if sudo kubectl get nodes 2>/dev/null | grep -q "Ready"; then
-    echo "k3s ready"; break
-  fi
-  echo "Waiting for k3s API... ($i/30)"
-  sleep 5
-done
-
-sudo kubectl get nodes
-sudo kubectl get pods -A
-REMOTE_EOF
-```
-
-> **Gotcha:** `--bind-address=$IP` makes k3s listen ONLY on that IP — not
-> also on 127.0.0.1. The default kubeconfig at
-> `/etc/rancher/k3s/k3s.yaml` has `server: https://127.0.0.1:6443` and will
-> NOT work as-is. Sed-rewrite below.
-
----
-
-## 8. Kubeconfig server-URL rewrite
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "CAPI_MGMT_METAL_IP=$CAPI_MGMT_METAL_IP bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# Copy k3s kubeconfig to ubuntu user; rewrite server URL
-mkdir -p /home/ubuntu/.kube
-sudo cp /etc/rancher/k3s/k3s.yaml /home/ubuntu/.kube/config
-sudo chown ubuntu:ubuntu /home/ubuntu/.kube/config
-chmod 600 /home/ubuntu/.kube/config
-
-# Rewrite 127.0.0.1 → metal IP
-sed -i "s|server: https://127.0.0.1:6443|server: https://$CAPI_MGMT_METAL_IP:6443|" \
-  /home/ubuntu/.kube/config
-
-# Verify rewrite
-grep "server:" /home/ubuntu/.kube/config
-# Expect: server: https://10.12.8.21:6443
-
-# Confirm kubectl works as ubuntu user (no sudo)
-kubectl get nodes
-REMOTE_EOF
-```
-
----
-
-## 9. helm + clusterctl install
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "HELM_VERSION=$HELM_VERSION CAPI_VERSION=$CAPI_VERSION bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# helm install (get-helm-3 fetches the version we specify)
-cd /tmp
-curl -sfL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \
-  | DESIRED_VERSION="$HELM_VERSION" bash
-helm version --short
-
-# clusterctl install
-CLUSTERCTL_URL="https://github.com/kubernetes-sigs/cluster-api/releases/download/${CAPI_VERSION}/clusterctl-linux-amd64"
-sudo curl -sfL "$CLUSTERCTL_URL" -o /usr/local/bin/clusterctl
-sudo chmod +x /usr/local/bin/clusterctl
-clusterctl version
-REMOTE_EOF
-```
-
----
-
-## 10. clusterctl init (CAPI controllers + cert-manager + ORC + CAPO + CK8s)
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "CK8S_VERSION=$CK8S_VERSION CERT_MANAGER_VERSION=$CERT_MANAGER_VERSION ORC_VERSION=$ORC_VERSION CAPO_VERSION=$CAPO_VERSION bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# Configure clusterctl with provider URLs
-mkdir -p ~/.cluster-api
-cat > ~/.cluster-api/clusterctl.yaml <<EOF
-providers:
-  - name: "canonical-kubernetes"
-    url: "https://github.com/canonical/cluster-api-k8s/releases/${CK8S_VERSION}/bootstrap-components.yaml"
-    type: "BootstrapProvider"
-  - name: "canonical-kubernetes"
-    url: "https://github.com/canonical/cluster-api-k8s/releases/${CK8S_VERSION}/control-plane-components.yaml"
-    type: "ControlPlaneProvider"
-EOF
-
-# Initialize CAPI with explicit versions
-clusterctl init \
-  --core "cluster-api:${CAPI_VERSION}" \
-  --infrastructure "openstack:${CAPO_VERSION}" \
-  --bootstrap "canonical-kubernetes:${CK8S_VERSION}" \
-  --control-plane "canonical-kubernetes:${CK8S_VERSION}" \
-  --cert-manager-version "${CERT_MANAGER_VERSION}"
-
-# Wait for controllers to be Ready
-kubectl wait --for=condition=Available --timeout=5m \
-  deployment --all -n capi-system
-kubectl wait --for=condition=Available --timeout=5m \
-  deployment --all -n capi-kubeadm-bootstrap-system 2>/dev/null || true
-kubectl wait --for=condition=Available --timeout=5m \
-  deployment --all -n capo-system
-kubectl wait --for=condition=Available --timeout=5m \
-  deployment --all -n cert-manager
-
-# Install ORC
-kubectl apply -f "https://github.com/k-orc/openstack-resource-controller/releases/${ORC_VERSION}/orc.yaml"
-kubectl wait --for=condition=Available --timeout=5m \
-  deployment --all -n orc-system
-
-# Confirm all controllers
-kubectl get pods -A | grep -v "Running\|Completed" | grep -v NAME
-# Expected: empty output (all pods Running or no abnormal state)
-REMOTE_EOF
-```
-
-> **Gotcha:** the actual namespace names (`capi-system`, `capo-system`, etc.)
-> are conventions. If a controller fails to land in the expected namespace,
-> `kubectl get deployment -A` lists all deployments — diagnose from there.
-
----
-
-## 11. Cloud-side prep (Keystone, Nova, Glance)
-
-Back on the jumphost:
-
-```bash
-source $HOME/admin-openrc
-
-# Inventory existing resources FIRST (Bobcat lesson: don't create duplicates)
-echo "=== Existing images ==="
-openstack image list -c ID -c Name -f json | jq -r '.[] | "\(.Name)\t\(.ID)"'
-echo ""
-echo "=== Existing flavors ==="
-openstack flavor list -c Name -c ID -c RAM -c VCPUs -c Disk -f json \
-  | jq -r '.[] | "\(.Name)\tRAM=\(.RAM)\tCPU=\(.VCPUs)\tDisk=\(.Disk)\tID=\(.ID)"'
-echo ""
-echo "=== Existing keypairs ==="
-openstack keypair list
-echo ""
-echo "=== Existing projects in admin_domain ==="
-openstack project list --domain admin_domain
-```
-
-**Create / verify resources:**
-
-```bash
-# Keystone project + user
-openstack project show capi-mgmt --domain admin_domain 2>/dev/null \
-  || openstack project create capi-mgmt --domain admin_domain --description "CAPI management plane"
-
-openstack user show capo --domain admin_domain 2>/dev/null \
-  || openstack user create capo --domain admin_domain --password-prompt --description "CAPO operator"
-
-# Role assignments (CAPO needs member + load-balancer_member at minimum;
-# admin works for testcloud — Roosevelt should use least-privilege)
-openstack role add --user capo --user-domain admin_domain \
-  --project capi-mgmt --project-domain admin_domain \
-  member
-
-openstack role add --user capo --user-domain admin_domain \
-  --project capi-mgmt --project-domain admin_domain \
-  load-balancer_member 2>/dev/null || \
-  echo "(load-balancer_member role may not exist if Octavia not deployed yet)"
-
-# Application credential — captured to file under $HOME (snap confinement)
-APP_CRED_FILE=$WORK/capo-app-cred.json
-openstack --os-username capo --os-user-domain-name admin_domain \
-          --os-project-name capi-mgmt --os-project-domain-name admin_domain \
-  application credential create capo-app-cred \
-  --description "CAPO operator app credential" \
-  -f json > "$APP_CRED_FILE"
-chmod 600 "$APP_CRED_FILE"
-
-# Extract credential ID + secret
-export APP_CRED_ID=$(jq -r '.id' "$APP_CRED_FILE")
-export APP_CRED_SECRET=$(jq -r '.secret' "$APP_CRED_FILE")
-echo "App cred ID: $APP_CRED_ID"
-```
-
-**Nova keypair (workload node SSH key):**
-
-```bash
-# Generate fresh keypair locally (do NOT reuse jumphost personal key)
-ssh-keygen -t ed25519 -N '' -f "$WORK/capi-workload-key" \
-  -C "capi-workload-$(date +%Y%m%d)"
-chmod 600 "$WORK/capi-workload-key"
-
-# Upload public key to Keystone as a Nova keypair
-openstack keypair create --public-key "$WORK/capi-workload-key.pub" capi-workload-key
-openstack keypair show capi-workload-key
-```
-
-**Workload image:**
-
-```bash
-# Inventory check — use noble-amd64 if it exists (Bobcat lesson: do NOT create ubuntu-24.04-capi as a dup)
-NOBLE_IMAGE_ID=$(openstack image show noble-amd64 -c id -f value 2>/dev/null || echo "")
-
-if [ -z "$NOBLE_IMAGE_ID" ]; then
-  echo "noble-amd64 image not found — upload required."
-  echo "(Pull from https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img"
-  echo " then: openstack image create --disk-format qcow2 --container-format bare \\"
-  echo "        --public --file noble-server-cloudimg-amd64.img noble-amd64)"
-  exit 1
-fi
-echo "Using image: noble-amd64 ($NOBLE_IMAGE_ID)"
-export WORKLOAD_IMAGE_ID=$NOBLE_IMAGE_ID
-```
-
-**Workload flavor:**
-
-```bash
-openstack flavor show capi-mgmt-node 2>/dev/null \
-  || openstack flavor create capi-mgmt-node \
-       --vcpus 4 --ram 4096 --disk 30 \
-       --description "CAPI workload node (control plane sizing)"
-
-export WORKLOAD_FLAVOR=capi-mgmt-node
-```
-
----
-
-## 12. clouds.yaml + cloud.conf composition (with Vault CA, no tls-insecure)
-
-The workload cluster's OCCM (OpenStack Cloud Controller Manager) and CAPO both
-need to call OpenStack APIs. Two files:
-
-- `clouds.yaml` — CAPO's view of how to reach OpenStack (used at cluster
-  creation time on capi-mgmt)
-- `cloud.conf` — OCCM's view, injected into the workload cluster's k8s
-  Secret (used continuously by OCCM running in the workload cluster)
-
-**Compose clouds.yaml:**
-
-```bash
-cat > "$WORK/clouds.yaml" <<EOF
-clouds:
-  capi-mgmt:
-    region_name: RegionOne
-    interface: public
-    identity_api_version: 3
-    auth_type: v3applicationcredential
-    auth:
-      auth_url: https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3
-      application_credential_id: $APP_CRED_ID
-      application_credential_secret: $APP_CRED_SECRET
-    cacert: /usr/local/share/ca-certificates/vault-ca.crt
-    verify: true
-EOF
-chmod 600 "$WORK/clouds.yaml"
-
-# base64-encode for cluster template embedding (no newline wrapping)
-base64 -w0 "$WORK/clouds.yaml" > "$WORK/clouds.yaml.b64"
-```
-
-**Compose cloud.conf** (INI format, NOT YAML):
-
-```bash
-cat > "$WORK/cloud.conf" <<EOF
-[Global]
-auth-url=https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3
-application-credential-id=$APP_CRED_ID
-application-credential-secret=$APP_CRED_SECRET
-region=RegionOne
-domain-name=admin_domain
-ca-file=/usr/local/share/ca-certificates/vault-ca.crt
-
-[LoadBalancer]
-use-octavia=true
-EOF
-chmod 600 "$WORK/cloud.conf"
-
-base64 -w0 "$WORK/cloud.conf" > "$WORK/cloud.conf.b64"
-```
-
-> **Critical delta from Bobcat:** the `ca-file` line replaces `tls-insecure=true`.
-> The path `/usr/local/share/ca-certificates/vault-ca.crt` exists on capi-mgmt
-> (from §6) AND will be injected into workload nodes via CK8sConfig in §13.
-
-**base64-encode Vault CA for CK8sConfig injection:**
-
-```bash
-base64 -w0 "$VAULT_CA" > "$WORK/vault-ca.crt.b64"
-wc -c "$WORK/vault-ca.crt.b64"
-```
-
----
-
-## 13. Cluster template rendering (with Vault CA injection)
-
-The cluster template defines:
-
-- Cluster object
-- OpenStackCluster (CAPO infrastructure)
-- CK8sControlPlane
-- CK8sConfigTemplate (control plane bootstrap — includes Vault CA injection)
-- MachineDeployment + CK8sConfigTemplate (workers — includes Vault CA injection)
-- Secrets for clouds.yaml and cloud.conf
-
-Variables (18 total):
-
-```bash
-export CLUSTER_NAME=capi-mgmt-cluster
-export CLUSTER_NAMESPACE=default
-export KUBERNETES_VERSION=v1.31.4              # adjust to CK8s-supported
-export CONTROL_PLANE_MACHINE_COUNT=1           # 3 for HA on Roosevelt
-export WORKER_MACHINE_COUNT=2                  # 3 on Roosevelt
-export OPENSTACK_DNS_NAMESERVERS=10.12.4.227   # designate VIP
-export OPENSTACK_FAILURE_DOMAIN=nova
-export OPENSTACK_EXTERNAL_NETWORK_ID=$(openstack network show ext_net -c id -f value)
-export OPENSTACK_IMAGE_NAME=noble-amd64
-export OPENSTACK_FLAVOR=capi-mgmt-node
-export OPENSTACK_SSH_KEY_NAME=capi-workload-key
-export POD_CIDR=10.244.0.0/16
-export SERVICE_CIDR=10.96.0.0/12
-export CLOUDS_YAML_B64=$(cat "$WORK/clouds.yaml.b64")
-export CLOUD_CONF_B64=$(cat "$WORK/cloud.conf.b64")
-export VAULT_CA_B64=$(cat "$WORK/vault-ca.crt.b64")
-export CLUSTER_DOMAIN=cluster.local
-export OPENSTACK_CLOUD=capi-mgmt
-
-# Sanity print
-env | grep -E "^(CLUSTER|KUBERNETES|CONTROL_PLANE|WORKER|OPENSTACK|POD|SERVICE|VAULT|CLOUD)" \
-  | grep -v "B64\|SECRET\|PASS" | sort
-```
-
-**Render the cluster template:**
-
-```bash
-cat > "$WORK/cluster-template.yaml" <<'TEMPLATE_EOF'
-apiVersion: v1
-kind: Secret
-metadata:
-  name: ${CLUSTER_NAME}-cloud-config
-  namespace: ${CLUSTER_NAMESPACE}
-type: Opaque
-data:
-  clouds.yaml: ${CLOUDS_YAML_B64}
-  cloud.conf: ${CLOUD_CONF_B64}
-  cacert: ${VAULT_CA_B64}
----
-apiVersion: cluster.x-k8s.io/v1beta1
-kind: Cluster
-metadata:
-  name: ${CLUSTER_NAME}
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  clusterNetwork:
-    pods:
-      cidrBlocks:
-        - ${POD_CIDR}
-    services:
-      cidrBlocks:
-        - ${SERVICE_CIDR}
-    serviceDomain: ${CLUSTER_DOMAIN}
-  infrastructureRef:
-    apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-    kind: OpenStackCluster
-    name: ${CLUSTER_NAME}
-  controlPlaneRef:
-    apiVersion: controlplane.cluster.x-k8s.io/v1beta2
-    kind: CK8sControlPlane
-    name: ${CLUSTER_NAME}-control-plane
----
-apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-kind: OpenStackCluster
-metadata:
-  name: ${CLUSTER_NAME}
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  identityRef:
-    name: ${CLUSTER_NAME}-cloud-config
-    cloudName: ${OPENSTACK_CLOUD}
-  externalNetwork:
-    id: ${OPENSTACK_EXTERNAL_NETWORK_ID}
-  managedSecurityGroups:
-    allowAllInClusterTraffic: true
-  apiServerLoadBalancer:
-    enabled: true
----
-apiVersion: controlplane.cluster.x-k8s.io/v1beta2
-kind: CK8sControlPlane
-metadata:
-  name: ${CLUSTER_NAME}-control-plane
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  replicas: ${CONTROL_PLANE_MACHINE_COUNT}
-  version: ${KUBERNETES_VERSION}
-  machineTemplate:
-    infrastructureTemplate:
-      apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-      kind: OpenStackMachineTemplate
-      name: ${CLUSTER_NAME}-control-plane
-  spec:
-    files:
-      - path: /usr/local/share/ca-certificates/vault-ca.crt
-        owner: root:root
-        permissions: "0644"
-        contentFrom:
-          secret:
-            name: ${CLUSTER_NAME}-cloud-config
-            key: cacert
-    preRunCommands:
-      - update-ca-certificates
-    extraKubeAPIServerArgs:
-      "--cloud-provider": external
----
-apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-kind: OpenStackMachineTemplate
-metadata:
-  name: ${CLUSTER_NAME}-control-plane
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  template:
-    spec:
-      flavor: ${OPENSTACK_FLAVOR}
-      image:
-        filter:
-          name: ${OPENSTACK_IMAGE_NAME}
-      sshKeyName: ${OPENSTACK_SSH_KEY_NAME}
-      identityRef:
-        name: ${CLUSTER_NAME}-cloud-config
-        cloudName: ${OPENSTACK_CLOUD}
----
-apiVersion: cluster.x-k8s.io/v1beta1
-kind: MachineDeployment
-metadata:
-  name: ${CLUSTER_NAME}-md-0
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  clusterName: ${CLUSTER_NAME}
-  replicas: ${WORKER_MACHINE_COUNT}
-  selector:
-    matchLabels: {}
-  template:
-    spec:
-      clusterName: ${CLUSTER_NAME}
-      version: ${KUBERNETES_VERSION}
-      bootstrap:
-        configRef:
-          apiVersion: bootstrap.cluster.x-k8s.io/v1beta2
-          kind: CK8sConfigTemplate
-          name: ${CLUSTER_NAME}-md-0
-      infrastructureRef:
-        apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-        kind: OpenStackMachineTemplate
-        name: ${CLUSTER_NAME}-md-0
----
-apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-kind: OpenStackMachineTemplate
-metadata:
-  name: ${CLUSTER_NAME}-md-0
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  template:
-    spec:
-      flavor: ${OPENSTACK_FLAVOR}
-      image:
-        filter:
-          name: ${OPENSTACK_IMAGE_NAME}
-      sshKeyName: ${OPENSTACK_SSH_KEY_NAME}
-      identityRef:
-        name: ${CLUSTER_NAME}-cloud-config
-        cloudName: ${OPENSTACK_CLOUD}
----
-apiVersion: bootstrap.cluster.x-k8s.io/v1beta2
-kind: CK8sConfigTemplate
-metadata:
-  name: ${CLUSTER_NAME}-md-0
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  template:
-    spec:
-      files:
-        - path: /usr/local/share/ca-certificates/vault-ca.crt
-          owner: root:root
-          permissions: "0644"
-          contentFrom:
-            secret:
-              name: ${CLUSTER_NAME}-cloud-config
-              key: cacert
-      preRunCommands:
-        - update-ca-certificates
-TEMPLATE_EOF
-
-# envsubst to render
-envsubst < "$WORK/cluster-template.yaml" > "$WORK/cluster-rendered.yaml"
-
-# Validate as YAML
-python3 -c "import yaml; list(yaml.safe_load_all(open('$WORK/cluster-rendered.yaml'))); print('YAML OK')"
-
-# Quick visual check — no leftover ${...} markers
-grep -n '\${' "$WORK/cluster-rendered.yaml" || echo "No unsubstituted variables — good"
-```
-
-> **CK8sConfig field name caveat:** the exact field names (`files`,
-> `preRunCommands`) and their `contentFrom.secret` schema are CK8s-version-
-> dependent. If `clusterctl init` failed earlier with schema warnings,
-> consult the CK8s release notes for the pinned `$CK8S_VERSION`.
-
----
-
-## 14. Apply + poll-to-Ready
-
-Transfer rendered template to capi-mgmt and apply:
-
-```bash
-scp "$WORK/cluster-rendered.yaml" ubuntu@$CAPI_MGMT_METAL_IP:/home/ubuntu/cluster.yaml
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP <<'EOF'
-set -euo pipefail
-kubectl apply -f /home/ubuntu/cluster.yaml
-echo "Applied. Waiting for cluster Available status (15-min timeout)..."
-
-for i in $(seq 1 90); do
-  STATUS=$(kubectl get cluster capi-mgmt-cluster -o json 2>/dev/null \
-    | jq -r '.status.phase // "Unknown"')
-  READY=$(kubectl get cluster capi-mgmt-cluster -o json 2>/dev/null \
-    | jq -r '.status.conditions[]? | select(.type=="Ready") | .status' \
-    | head -1)
-  echo "$(date -Is) phase=$STATUS ready=$READY"
-  [ "$READY" = "True" ] && { echo "Cluster Ready"; break; }
-  sleep 10
-done
-
-kubectl get cluster,machines,kubeadmcontrolplane,machinedeployment -A
-EOF
-```
-
-**If the poll times out before Ready,** typical diagnosis:
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl describe cluster capi-mgmt-cluster
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get machines -A
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl logs -n capo-system deployment/capo-controller-manager --tail=100
-```
-
-Common causes:
-
-- OpenStack API unreachable from capi-mgmt → check Vault CA install on capi-mgmt (§6)
-- Image / flavor / network ID wrong in cluster template → re-check §11 variables
-- Security group rules block kube-api LB → CAPO usually handles this; check OpenStackCluster status
-- Application credential expired / wrong → re-check `$APP_CRED_ID`
-
----
-
-## 15. Extract workload kubeconfig
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- clusterctl get kubeconfig capi-mgmt-cluster \
-  > "$WORK/capi-mgmt-cluster.kubeconfig"
-chmod 600 "$WORK/capi-mgmt-cluster.kubeconfig"
-
-# Sanity-check the workload cluster is reachable
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get nodes
-# Expect: 1 control plane + 2 workers, all Ready
-```
-
-If `get nodes` times out, the cluster's API LB may not have allocated its
-external IP yet, or the firewall rules don't permit jumphost → workload API:
-
-```bash
-# What IP is the cluster's API LB on?
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get openstackcluster capi-mgmt-cluster \
-  -o json | jq '.status.externalNetwork, .status.controlPlaneEndpoint'
-
-# Test reachability
-curl -sk --max-time 10 "https://<API-IP>:6443/version" && echo " ← reachable" || echo "API LB unreachable"
-```
-
----
-
-## 16. `clusterctl init` on target (workload cluster)
-
-The workload cluster must have the same CAPI providers installed before `move`.
-
-```bash
-# Run from jumphost using the workload kubeconfig
-KUBECONFIG="$WORK/capi-mgmt-cluster.kubeconfig" clusterctl init \
-  --core "cluster-api:${CAPI_VERSION}" \
-  --infrastructure "openstack:${CAPO_VERSION}" \
-  --bootstrap "canonical-kubernetes:${CK8S_VERSION}" \
-  --control-plane "canonical-kubernetes:${CK8S_VERSION}" \
-  --cert-manager-version "${CERT_MANAGER_VERSION}"
-
-# ORC into workload cluster too
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" apply \
-  -f "https://github.com/k-orc/openstack-resource-controller/releases/${ORC_VERSION}/orc.yaml"
-
-# Wait for everything Available
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m \
-  deployment --all -n capi-system
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m \
-  deployment --all -n capo-system
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m \
-  deployment --all -n cert-manager
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m \
-  deployment --all -n orc-system
-```
-
-> **cert-manager double-install caveat:** if CK8s already installed
-> cert-manager during workload bootstrap, the second `clusterctl init` may
-> warn or skip. Check existing cert-manager version against `$CERT_MANAGER_VERSION`
-> — if they differ, version-skew issues may surface post-pivot. Adjust the
-> pin in §4 or accept the existing version. Roosevelt's standard practice
-> is to install cert-manager via `clusterctl init` only (don't pre-install
-> via CK8s) — same approach valid here if you want clean version control.
-
----
-
-## 17. `clusterctl move` pivot
-
-Move all CAPI CRs from bootstrap k3s → workload cluster:
-
-```bash
-# Stage the target kubeconfig on capi-mgmt (where clusterctl move runs)
-scp "$WORK/capi-mgmt-cluster.kubeconfig" ubuntu@$CAPI_MGMT_METAL_IP:/home/ubuntu/target.kubeconfig
-
-# Dry-run first to catch issues before commit
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- clusterctl move \
-  --to-kubeconfig=/home/ubuntu/target.kubeconfig \
-  --dry-run
-
-# Inspect dry-run output: list of objects to be moved. Should include:
-#   - Cluster, OpenStackCluster, OpenStackClusterTemplate
-#   - Secrets (cloud-config)
-#   - Machine objects, OpenStackMachineTemplate
-#   - CK8sControlPlane, CK8sConfigTemplate
-#   - MachineDeployment
-# Should NOT include cert-manager state (cert-manager manages its own state
-# on each cluster independently)
-```
-
-**If dry-run looks correct, execute the move:**
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- clusterctl move \
-  --to-kubeconfig=/home/ubuntu/target.kubeconfig
-
-# Move can take several minutes. Output ends with: "moved successfully"
-```
-
----
-
-## 18. Post-pivot verification
-
-```bash
-echo "=== Bootstrap k3s (should now be empty of cluster CRs) ==="
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get cluster -A
-# Expect: No resources found (or only a header)
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get machines -A
-# Expect: No resources found
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get openstackcluster -A
-# Expect: No resources found
-
-echo ""
-echo "=== Workload cluster (should now own its own cluster CRs) ==="
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get cluster -A
-# Expect: capi-mgmt-cluster shown, phase=Provisioned
-
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get machines -A
-# Expect: 3 machines (1 control-plane + 2 workers), all Running
-
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get openstackcluster -A
-
-echo ""
-echo "=== CAPI controllers in workload ==="
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get pods -A \
-  | grep -E "(capi|capo|orc|cert-manager)" | grep -v "Running\|Completed"
-# Expect: empty (all controller pods Running)
-
-echo ""
-echo "=== OCCM not crash-looping (CRITICAL — main goal of TLS-verify work) ==="
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get pods -n kube-system \
-  -l k8s-app=openstack-cloud-controller-manager
-# Expect: 1 pod Running, NOT CrashLoopBackOff
-
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" logs -n kube-system \
-  -l k8s-app=openstack-cloud-controller-manager --tail=50 \
-  | grep -iE "(tls|cert|error)" | head -20
-# Expect: no TLS/cert errors; OCCM should be healthy
-```
-
-> **If OCCM crash-loops with "x509: certificate signed by unknown authority":**
-> Vault CA distribution failed. Check (a) `/usr/local/share/ca-certificates/vault-ca.crt`
-> exists on workload nodes; (b) `update-ca-certificates` ran (check `/etc/ssl/certs/ca-certificates.crt`
-> for the Vault CA's subject); (c) the secret reference in CK8sConfigTemplate
-> matched the secret name. SSH into a worker via the jumphost key (`ssh -i
-> $WORK/capi-workload-key ubuntu@<worker-IP-via-FIP>`) to diagnose.
-
----
-
-## 19. Handoff to runbook 05
-
-The workload kubeconfig at `$WORK/capi-mgmt-cluster.kubeconfig` is the input to
-`runbooks/05-magnum-capi-driver.md`. Copy it to a stable path:
-
-```bash
-mkdir -p $HOME/magnum-capi
-cp "$WORK/capi-mgmt-cluster.kubeconfig" $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig
-chmod 600 $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig
-echo "Workload kubeconfig staged at: $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig"
-```
-
-> **Important — post-pivot semantic shift from Bobcat:** Magnum's
-> `kubeconfig_file` setting (under `[capi_helm]` in
-> `/etc/magnum/magnum.conf.d/99-capi.conf`, per D-007) now points to the
-> workload cluster, not the bootstrap k3s. Bobcat had Magnum pointing at
-> bootstrap k3s because the pivot was never executed. With pivot mandatory,
-> Magnum's CAPI calls flow:
->
-> ```
-> Magnum/leader → workload cluster API → CAPI controllers (running in workload)
->                                       → create new Cluster CRs (tenant Magnum clusters)
-> ```
->
-> The bootstrap k3s on capi-mgmt is now disposable. If you wanted, you could
-> destroy capi-mgmt entirely at this point — the workload cluster manages
-> itself. (Roosevelt may actually do this for cost savings.) For v1 testcloud,
-> leave capi-mgmt running so its k3s can be inspected for diagnostics.
-
----
-
-## 20. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Workload image | Default `noble-amd64` from cloud-images.ubuntu.com | Custom image baked with Vault CA pre-installed (no runtime install step) |
-| Vault CA distribution | CK8sConfig `files:` + `preRunCommands:` (this runbook) | Image-baked + CK8sConfig (defense in depth) |
-| App credential lifetime | No expiry set (testcloud) | Short-lived rotating credentials via Vault auth method |
-| Workload cluster control plane | 1 node | 3 nodes (HA) |
-| Workload cluster workers | 2 nodes | Per-tenant sizing; HPA-driven |
-| `clusterctl init --cert-manager-version` | Pin from §4 | Pin to Vault PKI cert-manager profile (separate Roosevelt prep) |
-| capi-mgmt VM lifecycle post-pivot | Kept running for diagnostics | Destroyed (cost savings; pivot makes it disposable) |
-| Version pinning record | `$HOME/deploy-records/<timestamp>/capi-pins/` | Same pattern, captured in Vault as audit artifact |
-| Authentication to GitHub API | Optional PAT | Mandatory PAT (avoid rate-limit during automated rebuilds) |
-
----
-
-## 21. Rotation/refresh of pins
-
-The pins captured in §4 will age. Recommended cadence:
-
-- **Per rebuild:** re-discover all pins (Step 1 of next execution will catch
-  natural drift).
-- **Out-of-band patch:** if a CVE drops for any pinned component, run §4
-  discovery alone and capture the new pin into `$DEPLOY_RECORD/`. Then for
-  the affected component only, follow the upgrade procedure from its
-  upstream docs (does NOT necessarily require this whole runbook re-run).
-
-For Roosevelt, this becomes a tracked maintenance window task.
-
----
-
-## 22. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-22 | Document created. Vault CA distribution (no tls-insecure), mandatory `clusterctl move` pivot, pin-at-execution version model. | Workstream 3b |
diff --git a/runbooks/deprecated/05-magnum-capi-driver.md b/runbooks/deprecated/05-magnum-capi-driver.md
deleted file mode 100644
index e1414e1..0000000
--- a/runbooks/deprecated/05-magnum-capi-driver.md
+++ /dev/null
@@ -1,529 +0,0 @@
-# Runbook 05 — Magnum CAPI Helm driver install
-
-**Status:** Executes after `04-magnum-domain.md` (Keystone wiring) and
-`04a-capi-bootstrap-cluster.md` (workload cluster + kubeconfig staged).
-Final post-deploy step to make Magnum capable of creating CAPI-managed
-tenant K8s clusters.
-
-**Cross-references:**
-- D-007 Layer B (Magnum two-layer install)
-- D-017 (CAPI bootstrap cluster lifecycle)
-- Runbook 04a §19 (workload kubeconfig handoff)
-- Workstream 3c decision (2026-05-22): magnum-capi-helm 1.1.0 from PyPI; workload-cluster kubeconfig (NOT bootstrap k3s)
-
-**Known doc inconsistency (tracked for cleanup):**
-D-007's Layer B currently states the kubeconfig points at "capi-mgmt.maas
-bootstrap k3s". That language is correct for Bobcat (no pivot) but obsolete
-post-workstream-3b (pivot mandatory). This runbook uses the workload cluster
-kubeconfig as the canonical target. D-007 patch to follow in a workstream-3
-cleanup commit.
-
----
-
-## 1. Purpose & scope
-
-Graft the CAPI Helm driver onto the Charmed Magnum deployment so that
-`openstack coe cluster create` provisions tenant K8s clusters via CAPI (in
-the workload cluster) instead of via the deprecated Heat driver.
-
-**Output of this runbook:**
-
-- `magnum-capi-helm==1.1.0` installed on the magnum unit's system Python.
-- `/etc/magnum/kubeconfig` populated with the workload cluster's
-  kubeconfig (post-pivot CAPI controller plane).
-- `/etc/magnum/magnum.conf.d/99-capi.conf` configured with
-  `enabled_drivers = k8s_capi_helm_v1` and `[capi_helm] kubeconfig_file=`.
-- Systemd overrides on `magnum-api` and `magnum-conductor` that replace
-  the init.d wrapper's ExecStart with explicit `--config-dir` invocation.
-- Both services running cleanly with the CAPI driver loaded.
-
-**Scope:** v1 testcloud. Roosevelt deltas in §12.
-
-**Out of scope:**
-- Magnum domain setup (runbook 04)
-- Workload cluster lifecycle (runbook 04a)
-- Smoketest tenant cluster creation is OPTIONAL (§11) — full validation
-  framework belongs in runbook 08.
-
----
-
-## 2. Decisions captured
-
-| Decision | Choice | Reason |
-|---|---|---|
-| Driver pin | `magnum-capi-helm==1.1.0` from PyPI | D-007 correction (stackhpc fork archived Dec 2024; canonical project on opendev/PyPI; 1.1.0 is last Caracal-cycle release) |
-| Install method | `pip3 install --break-system-packages` | PEP 668 — Ubuntu 22.04+ requires explicit override for system-site-packages install |
-| Install scope | System Python on magnum unit (not venv) | Magnum charm uses system-packaged python at `/usr/lib/python3/dist-packages/magnum/`; driver must import from same site |
-| Kubeconfig target | Workload cluster (post-pivot) | Workstream 3b — bootstrap k3s is empty post-pivot; CAPI controllers live in workload |
-| Kubeconfig source | `$HOME/magnum-capi/capi-mgmt-cluster.kubeconfig` (staged by 04a §19) | Documented handoff |
-| Driver entry-point name | `k8s_capi_helm_v1` | Per upstream magnum-capi-helm 1.1.0; verify in §10 |
-| Conf.d filename | `99-capi.conf` | Numeric prefix ensures it loads AFTER any charm-managed conf, so `enabled_drivers` override wins |
-| File encoding | ASCII-only | Non-ASCII in conf.d causes silent magnum daemon failures (handoff lesson; cf. Horizon `local_settings.d` issue) |
-| Trustee credential | Existing magnum-shared user (charm-managed) | Roosevelt will use app-credential pattern |
-
----
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| Magnum charm active/idle | `juju status magnum \| grep magnum/0` shows `active idle` |
-| Magnum domain setup completed (runbook 04) | `openstack domain show magnum \| grep enabled` returns `True` |
-| Workload cluster reachable from jumphost | `kubectl --kubeconfig $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig get nodes` returns Ready nodes |
-| CAPI controllers running in workload cluster | `kubectl --kubeconfig $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig get pods -n capi-system \| grep -v Running \| grep -v NAME` empty |
-| Workload kubeconfig staged at expected path | `test -r $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig && stat -c %a $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig` shows `600` |
-| `juju exec` works to magnum/leader (use exec, NOT ssh, for non-interactive — handoff lesson) | `juju exec --unit magnum/leader -- hostname` returns the unit hostname |
-
-**Set shell context:**
-
-```bash
-export WORK=$HOME/magnum-capi
-export WORKLOAD_KUBECONFIG=$WORK/capi-mgmt-cluster.kubeconfig
-export DRIVER_VERSION=magnum-capi-helm==1.1.0   # per D-007 correction
-cd "$WORK"
-```
-
-> **`juju ssh` vs `juju exec` choice:** the handoff lessons explicitly call
-> out that `juju ssh` hangs when stdout is redirected (PTY allocation issue).
-> This runbook uses `juju exec` for all non-interactive command execution and
-> reserves `juju ssh` only for cases where you actually want an interactive
-> shell.
-
----
-
-## 4. Pre-flight: capture current state
-
-Capture the magnum unit's state BEFORE making changes. Useful for diagnosis
-if anything goes wrong, and as a record of what was changed.
-
-```bash
-mkdir -p "$WORK/pre-state"
-
-# Service unit files (as managed by charm)
-juju exec --unit magnum/leader -- \
-  'sudo systemctl cat magnum-api magnum-conductor 2>&1' \
-  > "$WORK/pre-state/systemd-units.txt"
-
-# Currently-enabled drivers
-juju exec --unit magnum/leader -- \
-  'sudo grep -r enabled_drivers /etc/magnum/ 2>/dev/null || echo "(no enabled_drivers found — charm default applies)"' \
-  > "$WORK/pre-state/drivers-pre.txt"
-
-# Python site-packages — see what's already installed
-juju exec --unit magnum/leader -- \
-  'sudo pip3 list 2>/dev/null | grep -iE "magnum|cluster|helm|kubernetes" || true' \
-  > "$WORK/pre-state/pip-pre.txt"
-
-# conf.d state
-juju exec --unit magnum/leader -- \
-  'sudo ls -la /etc/magnum/magnum.conf.d/ 2>/dev/null || echo "(no conf.d directory)"' \
-  > "$WORK/pre-state/confd-pre.txt"
-
-# Service running state
-juju exec --unit magnum/leader -- \
-  'sudo systemctl is-active magnum-api magnum-conductor' \
-  > "$WORK/pre-state/service-state-pre.txt"
-
-# Display the captured state
-cat "$WORK/pre-state/"*.txt
-```
-
-> **What to look for in pre-state:** the charm-managed `enabled_drivers` value
-> probably includes Heat-based drivers (`heat_kubernetes`, etc.). The 99-capi.conf
-> override in §7 replaces this with the single CAPI driver. The pre-state
-> capture documents what was active before the override took effect.
-
----
-
-## 5. Install magnum-capi-helm 1.1.0
-
-```bash
-juju exec --unit magnum/leader -- \
-  "sudo pip3 install $DRIVER_VERSION --break-system-packages"
-```
-
-**Verify install:**
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo pip3 show magnum-capi-helm | head -10'
-# Expect: Name: magnum-capi-helm
-#         Version: 1.1.0
-#         Location: /usr/lib/python3/dist-packages
-
-juju exec --unit magnum/leader -- \
-  'sudo python3 -c "import magnum_capi_helm; print(magnum_capi_helm.__file__)"'
-# Expect: /usr/lib/python3/dist-packages/magnum_capi_helm/__init__.py
-```
-
-**Check that the driver entry point is registered:**
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo python3 -c "
-from stevedore import driver
-mgr = driver.DriverManager(
-    namespace=\"magnum.drivers\",
-    name=\"k8s_capi_helm_v1\",
-    invoke_on_load=False
-)
-print(\"Driver class:\", mgr.driver)
-"'
-# Expect: Driver class: <class 'magnum_capi_helm.driver.Driver'>
-# (or similar — the actual class path is package-version-dependent)
-```
-
-> If the entry point check fails with "No 'k8s_capi_helm_v1' driver found",
-> the driver name in 1.1.0 may differ from what D-007 documented. Inspect the
-> installed package's `entry_points.txt`:
->
-> ```bash
-> juju exec --unit magnum/leader -- \
->   'sudo cat /usr/lib/python3/dist-packages/magnum_capi_helm*.dist-info/entry_points.txt 2>/dev/null'
-> ```
->
-> Find the entry under `[magnum.drivers]` — use that exact name in §7.
-
----
-
-## 6. Stage workload kubeconfig on magnum unit
-
-```bash
-# Transfer kubeconfig from jumphost to magnum unit
-juju scp "$WORKLOAD_KUBECONFIG" magnum/leader:/tmp/kubeconfig
-
-# Install with correct ownership/mode in one atomic step
-juju exec --unit magnum/leader -- \
-  'sudo install -m 0640 -o root -g magnum /tmp/kubeconfig /etc/magnum/kubeconfig && sudo rm /tmp/kubeconfig'
-```
-
-**Verify:**
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo ls -la /etc/magnum/kubeconfig'
-# Expect: -rw-r----- 1 root magnum ... /etc/magnum/kubeconfig
-
-# Confirm magnum user can read it
-juju exec --unit magnum/leader -- \
-  'sudo -u magnum cat /etc/magnum/kubeconfig | head -3'
-# Expect: apiVersion: v1 / clusters: / - cluster:
-
-# Confirm kubectl can use it from the magnum unit (sanity check on API reachability)
-juju exec --unit magnum/leader -- \
-  'sudo -u magnum kubectl --kubeconfig /etc/magnum/kubeconfig get nodes 2>&1 | head -10'
-# Expect: NAME ... STATUS=Ready for control plane + workers
-# OR: kubectl not installed (acceptable — magnum-capi-helm uses Python client, not kubectl)
-```
-
-> **Why mode 0640 and group magnum:** kubeconfig contains auth tokens. Mode
-> 0600 (owner-only) wouldn't let the `magnum` system user (which runs
-> magnum-api/conductor) read it. Mode 0640 with `group: magnum` is the
-> minimum-permission setup that works. NOT 0644 — keeps it off other users
-> on the unit.
-
----
-
-## 7. Configure `/etc/magnum/magnum.conf.d/99-capi.conf`
-
-Generate the conf locally first (snap confinement does not apply to plain
-bash on jumphost, but we keep paths under `$HOME` for consistency), then
-transfer.
-
-**ASCII-only verification is critical** — the handoff documents non-ASCII
-characters in `conf.d` files causing silent daemon failures (cf. Horizon
-`local_settings.d`). Use plain straight quotes, ASCII dashes, no smart
-typography.
-
-```bash
-# Write locally
-cat > "$WORK/99-capi.conf" <<'EOF'
-[DEFAULT]
-enabled_drivers = k8s_capi_helm_v1
-
-[capi_helm]
-kubeconfig_file = /etc/magnum/kubeconfig
-EOF
-
-# Verify it is pure ASCII (no UTF-8 sneakers)
-file "$WORK/99-capi.conf"
-# Expect: ASCII text
-# If it says "UTF-8 Unicode text", STOP and rewrite by hand — even one stray
-# em-dash or smart quote will silently break magnum
-
-# Hex dump check (paranoid mode)
-xxd "$WORK/99-capi.conf" | grep -v "^[0-9a-f]*: [0-9a-f ]*  [a-zA-Z0-9 \[\]=._/]*$" | head -5
-# Expect: empty output (all bytes are printable ASCII)
-```
-
-**Stage and install:**
-
-```bash
-juju scp "$WORK/99-capi.conf" magnum/leader:/tmp/99-capi.conf
-
-juju exec --unit magnum/leader -- \
-  'sudo mkdir -p /etc/magnum/magnum.conf.d && sudo install -m 0644 -o root -g root /tmp/99-capi.conf /etc/magnum/magnum.conf.d/99-capi.conf && sudo rm /tmp/99-capi.conf'
-
-# Verify
-juju exec --unit magnum/leader -- \
-  'sudo ls -la /etc/magnum/magnum.conf.d/ && sudo cat /etc/magnum/magnum.conf.d/99-capi.conf'
-# Expect: file listed; content matches what was written
-```
-
----
-
-## 8. Systemd override on magnum-api + magnum-conductor
-
-The Charmed Magnum unit files use a wrapper pattern:
-
-```
-ExecStart=/etc/init.d/magnum-api systemd-start
-```
-
-The wrapper does NOT pass `--config-dir` to magnum-api, so `/etc/magnum/magnum.conf.d/`
-is never loaded. The 99-capi.conf would have no effect.
-
-Override with explicit `--config-file` + `--config-dir` invocation.
-
-**Generate override files locally:**
-
-```bash
-cat > "$WORK/magnum-api-override.conf" <<'EOF'
-[Service]
-ExecStart=
-ExecStart=/usr/bin/magnum-api --config-file=/etc/magnum/magnum.conf --config-dir=/etc/magnum/magnum.conf.d
-EOF
-
-cat > "$WORK/magnum-conductor-override.conf" <<'EOF'
-[Service]
-ExecStart=
-ExecStart=/usr/bin/magnum-conductor --config-file=/etc/magnum/magnum.conf --config-dir=/etc/magnum/magnum.conf.d
-EOF
-
-# ASCII check
-file "$WORK/magnum-api-override.conf" "$WORK/magnum-conductor-override.conf"
-# Expect: ASCII text x2
-```
-
-> **The empty `ExecStart=` line is critical.** Systemd accumulates ExecStart
-> directives by default; an empty assignment is required to CLEAR the inherited
-> directive before setting the replacement. Without the empty line, the unit
-> would have BOTH the init.d wrapper AND the new direct invocation, and would
-> likely fail to start.
-
-**Install on the unit:**
-
-```bash
-juju scp "$WORK/magnum-api-override.conf" magnum/leader:/tmp/magnum-api-override.conf
-juju scp "$WORK/magnum-conductor-override.conf" magnum/leader:/tmp/magnum-conductor-override.conf
-
-juju exec --unit magnum/leader -- \
-  'sudo mkdir -p /etc/systemd/system/magnum-api.service.d /etc/systemd/system/magnum-conductor.service.d && \
-   sudo install -m 0644 -o root -g root /tmp/magnum-api-override.conf /etc/systemd/system/magnum-api.service.d/override.conf && \
-   sudo install -m 0644 -o root -g root /tmp/magnum-conductor-override.conf /etc/systemd/system/magnum-conductor.service.d/override.conf && \
-   sudo rm /tmp/magnum-api-override.conf /tmp/magnum-conductor-override.conf'
-
-# Reload systemd to pick up the overrides
-juju exec --unit magnum/leader -- 'sudo systemctl daemon-reload'
-
-# Verify the overrides are effective (systemctl cat shows combined unit + overrides)
-juju exec --unit magnum/leader -- 'sudo systemctl cat magnum-api | grep -A1 ExecStart'
-# Expect: TWO ExecStart= lines — the empty clear-line and the new /usr/bin/magnum-api invocation
-juju exec --unit magnum/leader -- 'sudo systemctl cat magnum-conductor | grep -A1 ExecStart'
-# Expect: TWO ExecStart= lines as above for magnum-conductor
-```
-
-> **Charm reconciliation note:** the Magnum charm may rewrite its own systemd
-> units on config changes or upgrades. The drop-in override at
-> `/etc/systemd/system/magnum-api.service.d/override.conf` is OUTSIDE the
-> charm's writable zone and should survive. Verify after any `juju refresh` or
-> `juju config magnum` command by re-running the `systemctl cat` check above.
-
----
-
-## 9. Restart services + verify health
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo systemctl restart magnum-api magnum-conductor'
-
-# Wait briefly for services to initialize
-sleep 5
-
-# Check active state
-juju exec --unit magnum/leader -- \
-  'sudo systemctl is-active magnum-api magnum-conductor'
-# Expect: active (x2)
-
-# Examine recent journal for errors (the critical step — magnum's silent failure
-# mode means we must read logs, not just trust is-active)
-juju exec --unit magnum/leader -- \
-  'sudo journalctl -u magnum-api --since "2 minutes ago" --no-pager | tail -50'
-juju exec --unit magnum/leader -- \
-  'sudo journalctl -u magnum-conductor --since "2 minutes ago" --no-pager | tail -50'
-```
-
-**Look for these red flags in the logs:**
-
-| Symptom | Likely cause | Remediation |
-|---|---|---|
-| `ImportError: No module named magnum_capi_helm` | §5 pip install failed | Re-run §5; check pip3 output |
-| `EntryPointError: No 'k8s_capi_helm_v1' driver` | Driver entry-point name mismatch | Verify name per §5 footnote; update §7 |
-| Service repeatedly restarts (look for "Started" appearing twice in 10s) | Likely a config error in 99-capi.conf | Re-check ASCII-only; check magnum.conf.d permissions |
-| `kubeconfig_file` not honored | --config-dir not being passed | §8 override not active; re-run `systemctl daemon-reload` |
-| Silent: no error but driver also not loading | Non-ASCII char snuck into a conf | `file /etc/magnum/magnum.conf.d/99-capi.conf` — if it says UTF-8, regenerate |
-
----
-
-## 10. CAPI driver enablement check
-
-Verify the driver is actually loaded by Magnum and reachable via the API.
-
-```bash
-source $HOME/admin-openrc
-
-# List supported COE drivers via the Magnum API
-openstack coe cluster template list -f json
-# (empty templates list is fine — we are checking the endpoint responds)
-
-# Direct check on the unit: scan the service's loaded drivers
-juju exec --unit magnum/leader -- \
-  'sudo journalctl -u magnum-conductor --since "5 minutes ago" --no-pager | grep -iE "driver|enabled" | head -20'
-# Expect: a line mentioning k8s_capi_helm_v1 having been loaded
-# (Magnum logs the loaded drivers at startup)
-
-# Definitive check: try creating a cluster template that requires the CAPI driver
-openstack coe cluster template create magnum-capi-driver-check \
-  --image noble-amd64 \
-  --keypair capi-workload-key \
-  --external-network ext_net \
-  --master-flavor capi-mgmt-node \
-  --flavor capi-mgmt-node \
-  --coe kubernetes \
-  --network-driver calico \
-  --labels kube_tag=v1.31.4
-
-openstack coe cluster template show magnum-capi-driver-check -c name -c coe -c labels
-```
-
-> **If template create fails with "driver not enabled" or similar:** the
-> Magnum API process is not loading the conf.d. Verify the systemd override
-> took effect — `sudo systemctl show magnum-api -p ExecStart` on the unit
-> should show the explicit `--config-dir` invocation. If it still shows the
-> init.d wrapper, the daemon-reload + restart did not pick up the override.
-
-**Cleanup the driver-check template:**
-
-```bash
-openstack coe cluster template delete magnum-capi-driver-check
-```
-
----
-
-## 11. Optional smoketest — create a tenant CAPI cluster
-
-This step is **optional**. Full validation belongs in runbook 08. Use this
-smoketest only if you want immediate confirmation that the entire chain
-(Magnum API -> conductor -> magnum-capi-helm -> CAPI controllers in workload
-cluster -> tenant K8s cluster on tenant VMs) works end-to-end.
-
-```bash
-# Create a cluster template tuned for testcloud smoketest
-openstack coe cluster template create magnum-smoketest-template \
-  --image noble-amd64 \
-  --keypair capi-workload-key \
-  --external-network ext_net \
-  --master-flavor capi-mgmt-node \
-  --flavor capi-mgmt-node \
-  --coe kubernetes \
-  --network-driver calico \
-  --labels boot_volume_size=20,kube_tag=v1.31.4,octavia_provider=ovn
-
-# Create a 1+1 cluster (minimum for smoketest)
-openstack coe cluster create magnum-smoketest \
-  --cluster-template magnum-smoketest-template \
-  --master-count 1 \
-  --node-count 1
-
-# Poll for status (15-20 min typical; CAPI provisions tenant VMs end-to-end)
-for i in $(seq 1 60); do
-  STATUS=$(openstack coe cluster show magnum-smoketest -c status -f value)
-  echo "$(date -Is) status=$STATUS"
-  case "$STATUS" in
-    CREATE_COMPLETE) echo "Smoketest passed"; break ;;
-    CREATE_FAILED)   echo "Smoketest FAILED"; openstack coe cluster show magnum-smoketest; exit 1 ;;
-  esac
-  sleep 30
-done
-
-# Retrieve the smoketest cluster's kubeconfig
-openstack coe cluster config magnum-smoketest --dir "$WORK/smoketest-kubeconfig"
-
-# Sanity-check the smoketest cluster
-KUBECONFIG="$WORK/smoketest-kubeconfig/config" kubectl get nodes
-KUBECONFIG="$WORK/smoketest-kubeconfig/config" kubectl get pods -A | head -20
-
-# Cleanup the smoketest cluster
-openstack coe cluster delete magnum-smoketest
-openstack coe cluster template delete magnum-smoketest-template
-```
-
-> **What success looks like:** the CAPI controllers in the workload cluster
-> receive the new Cluster CR (created by magnum-capi-helm in response to the
-> Magnum API call), CAPO talks to OpenStack to provision tenant VMs, the
-> tenant VMs join the new K8s cluster, and the new cluster has 1 control
-> plane + 1 worker Ready. Octavia provides the API server LB (visible as a
-> Floating IP in the tenant project).
-
----
-
-## 12. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Driver pin source | PyPI `magnum-capi-helm==1.1.0` | Internal mirror with checksum verification |
-| Driver pin record | Implicit in this runbook | Captured in Vault as audit artifact alongside CAPI pins |
-| Kubeconfig source | Workload cluster (post-pivot per 04a §17) | Same |
-| Kubeconfig rotation | Manual on capi-mgmt rebuild | Automated when workload cluster cert rotates |
-| Trustee credential | Charm-default magnum-shared user | Per-tenant app credentials via Vault auth method |
-| Magnum HA | num_units=1 (per D-009 testcloud) | num_units=3 with hacluster + provider VIP |
-| Driver upgrade discipline | Manual re-run of §5 | Tracked maintenance window; Vault audit log |
-| Systemd override | Drop-in at `/etc/systemd/system/magnum-*.service.d/override.conf` | Same — but provided via a charm overlay package, not manual file install |
-| ASCII-only enforcement | Manual check (§7, §8) | Pre-flight lint in `scripts/pre-flight-checks.sh` |
-
----
-
-## 13. Documented runtime gotchas (carry-forward from handoff)
-
-These gotchas burned cycles during the Bobcat Magnum CAPI work. Each is
-explicitly handled in this runbook; collecting them here for visibility:
-
-1. **PEP 668 `--break-system-packages`** (§5). Ubuntu 22.04+ refuses
-   `pip install` against system Python by default. The flag is required for
-   the magnum-capi-helm install path used by Charmed Magnum.
-2. **`juju ssh` hangs on stdout redirect.** PTY allocation issue.
-   This runbook uses `juju exec` for all non-interactive command execution.
-3. **Heredoc nesting in `juju ssh` is fragile.** This runbook writes
-   conf files locally first and uses `juju scp` + `juju exec install` to
-   transfer — single-level only.
-4. **Non-ASCII characters in `conf.d` files cause silent daemon failures.**
-   §7 and §8 both include `file <path>` ASCII verification before transfer.
-5. **`openstack -f value -c X -c Y` outputs in alphabetical field order,
-   not flag order.** This runbook uses single-column queries or `-f json |
-   jq` throughout.
-6. **Charm-managed `enabled_drivers` is overridden, not appended.** The
-   `enabled_drivers = k8s_capi_helm_v1` line in 99-capi.conf REPLACES the
-   charm-default value (which would include the deprecated Heat drivers).
-7. **The systemd override empty `ExecStart=` line is required** to clear
-   the inherited ExecStart before setting the replacement (§8).
-8. **Snap-confined `openstack` CLI cannot read `/tmp`.** This runbook stages
-   files under `$WORK=$HOME/magnum-capi`. The smoketest in §11 also writes
-   to `$WORK/smoketest-kubeconfig`.
-
----
-
-## 14. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-22 | Document created. magnum-capi-helm 1.1.0 from PyPI; workload-cluster kubeconfig (post-pivot per workstream 3b); systemd override pattern; ASCII-only conf.d. | Workstream 3c |
diff --git a/runbooks/deprecated/06-tenant-setup.md b/runbooks/deprecated/06-tenant-setup.md
deleted file mode 100644
index 3915229..0000000
--- a/runbooks/deprecated/06-tenant-setup.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Runbook 06 — Tenant Resource Recreation
-
-**STATUS: PLACEHOLDER** — drafted post-deploy.
-
-## Purpose
-
-Recreate the standard testcloud tenant resources (domain, project, user,
-networks, images, keypairs, flavors) using a proper IPAM-aligned design
-per D-010 + D-016 (not the ad-hoc `user1` pattern from the original test
-cloud).
-
-## Prerequisites
-
-- Cloud fully deployed and validated
-- DNS zones populated (Runbook 07 may precede this if Designate-via-tenant
-  DNS is in scope at tenant create time)
-- NetBox IPv4 tenant pool prefix present (per D-016; default `10.20.0.0/16`)
-
-## TODO
-
-- [ ] Create domain `domain1`
-- [ ] Create project `project1` in domain `domain1`
-- [ ] Create user `user1` in project1 (member role + load-balancer_member
-      role for Octavia)
-- [ ] Tenant network with CIDR carved from NetBox IPv4 tenant pool
-      - Suggested convention: `10.20.<project-index>.0/24` per D-016
-      - project1 → `10.20.1.0/24`
-      - Per D-016 hybrid model, the per-project /24 is Neutron-managed and
-        NOT added back to NetBox
-- [ ] Tenant router connected to ext_net (Provider 10.12.4.0/22)
-- [ ] Glance image: noble-amd64 (cloud-init enabled)
-- [ ] Flavor m1.small (1 vCPU, 2 GiB RAM, 20 GiB root)
-- [ ] Keypair for user1
-- [ ] openrc files: `~/admin-openrc`, `~/user1-openrc`
-- [ ] Application credentials for user1 (audit trail)
-- [ ] Take second KVM snapshot (per D-012 Snapshot 2)
-
-## v1 vs. v2 note
-
-In v1, tenant networks are IPv4-only. v2 adds IPv6 tenant subnets carved
-from the v2 IPv6 tenant pool (currently reservation status in NetBox).
diff --git a/runbooks/deprecated/07-dns-zones.md b/runbooks/deprecated/07-dns-zones.md
deleted file mode 100644
index 3b780de..0000000
--- a/runbooks/deprecated/07-dns-zones.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Runbook 07 — Designate Zones and Records (v1: A records only)
-
-**STATUS: PLACEHOLDER** — drafted post-deploy.
-
-## Purpose
-
-Create the cloud's DNS zones in Designate, populate API VIP A records
-(v1: IPv4 only), and configure Neutron defaults to push Designate as
-tenant DNS resolver.
-
-## Prerequisites
-
-- Designate charm in `active/idle`
-- Keystone, Neutron API reachable
-- API VIP hostnames already in `/etc/hosts` on all OpenStack nodes
-  (per D-008 Layer 0 bootstrap)
-
-## TODO
-
-- [ ] Create primary zone:
-      `openstack zone create --email admin@neumatrix.local \
-       omega.dc0.vr0.cloud.neumatrix.local.`
-- [ ] Populate API VIP **A** records for each public service:
-      - keystone, glance, nova, neutron, cinder, placement, octavia,
-        barbican, magnum, horizon, designate
-      - **v1: A records only** (IPv4 VIPs from the Provider API VIP range
-        10.12.4.224-.254)
-      - **v2 will add AAAA records when IPv6 Provider VIPs become active**
-- [ ] Configure Neutron defaults:
-      `juju config neutron-api default-dns-domain=omega.dc0.vr0.cloud.neumatrix.local.`
-      `juju config neutron-api dns-domain=omega.dc0.vr0.cloud.neumatrix.local.`
-- [ ] Configure Neutron DHCP to push Designate as resolver:
-      `juju config neutron-api dns-servers=<designate-vip-ipv4>`
-- [ ] Verify from a test tenant VM:
-      `nslookup keystone.omega.dc0.vr0.cloud.neumatrix.local`
-      resolves to Provider API VIP
diff --git a/runbooks/deprecated/08-validate.md b/runbooks/deprecated/08-validate.md
deleted file mode 100644
index 7f7acfc..0000000
--- a/runbooks/deprecated/08-validate.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Runbook 08 — Validation (Roosevelt-Rehearsal Bar)
-
-**STATUS: PLACEHOLDER** — drafted with scripts/validate.sh.
-
-## Purpose
-
-Execute the validation criteria from D-011 and confirm the cloud is ready to
-be considered a successful rebuild.
-
-## Prerequisites
-
-- All prior runbooks complete
-
-## Validation criteria (per D-011)
-
-- [ ] All charms `active/idle` in `juju status`
-- [ ] All public API VIPs respond on FQDN from jumphost
-- [ ] All public API VIPs respond on FQDN from a tenant VM (Option B path)
-- [ ] Octavia LB pattern passes: create LB, two members, round-robin verified,
-      failover verified, recovery verified
-- [ ] Magnum CAPI cluster create end-to-end: cluster template + cluster create,
-      OCCM does not crash-loop, cluster reaches CREATE_COMPLETE
-- [ ] Vault unseal + auto-unseal-after-reboot pattern: reboot vault unit,
-      confirm auto-unseal via etcd (or manual unseal per HA pattern)
-- [ ] Designate resolves API hostnames from tenant subnet
-- [ ] Snapshot 1 (post-deploy, pre-tenant) taken (per D-012)
-- [ ] Snapshot 2 (post-tenant) taken (per D-012)
-
-## TODO
-
-- [ ] Run `scripts/validate.sh` and capture output
-- [ ] Document any divergences from validation criteria in
-      `docs/design-decisions.md` change log
diff --git a/runbooks/deprecated/README.md b/runbooks/deprecated/README.md
deleted file mode 100644
index b884f0e..0000000
--- a/runbooks/deprecated/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Deprecated v1 Runbooks
-
-The runbooks in this directory have been superseded by the
-`runbooks/v1-do-doc-NN-*.md` execution documents (or, in the case of
-`07-dns-zones.md`, deferred to v2 entirely per D-019).
-
-They are preserved here so the audit trail from the early v1 drafting
-phase remains accessible. **Do not execute them.** The v1 deploy is
-gated through the do-document set.
-
-## Replacement map
-
-| Deprecated runbook | Replacement |
-|---|---|
-| `00-pre-deploy.md` | superseded by D-017 + D-018 (no per-cycle backups; direct MAAS teardown); `v1-do-doc-01-prep.md` covers prep |
-| `01a-octavia-pki-generation.md` | `v1-do-doc-02-pki.md` |
-| `02-deploy.md` | `v1-do-doc-04-deploy.md` |
-| `03-vault-init.md` | `v1-do-doc-05-vault-init.md` |
-| `04-magnum-domain.md` | `v1-do-doc-06-magnum-domain.md` |
-| `04a-capi-bootstrap-cluster.md` | `v1-do-doc-07-capi-bootstrap.md` |
-| `05-magnum-capi-driver.md` | `v1-do-doc-08-magnum-driver.md` |
-| `06-tenant-setup.md` | `v1-do-doc-09-tenant.md` |
-| `07-dns-zones.md` | **deferred to v2 per D-019** (no v1 replacement) |
-| `08-validate.md` | `v1-do-doc-10-validate.md` |
-
-`01-destroy-model.md` is **not** in this directory - it remains active in
-`runbooks/` and is referenced as a conditional sub-procedure by
-`v1-do-doc-03-destroy.md`.
\ No newline at end of file
diff --git a/runbooks/phase-00-teardown-maas-reset.md b/runbooks/phase-00-teardown-maas-reset.md
new file mode 100644
index 0000000..94bf9ba
--- /dev/null
+++ b/runbooks/phase-00-teardown-maas-reset.md
@@ -0,0 +1,243 @@
+# Phase 00 -- Teardown + MAAS Reset
+
+Destroy the `openstack` Juju model and reset the four MAAS hosts to a clean,
+deploy-ready state: OSD secondary disks wiped, storage-class NICs linked, and the
+MAAS VIP/FIP address carve in place. This is the rebuild-prep window -- it runs
+BEFORE phase-01, because the VIP block must be MAAS-reserved before the bundle
+deploys onto it, and `link-subnet` only works on a Ready (not Deployed) machine.
+
+Decisions: D-018 (skip graceful; MAAS-release-direct; supersedes D-013), D-017
+(full rebuild every cycle, nothing preserved), KI-P3-001 (the VIP carve fix).
+Troubleshooting: appendix-A -- DOCFIX-016 (never `maas list` -- API-key leak),
+DOCFIX-017 (no `maas whoami`; hardcode the eyeballed system_ids), R7 (sudo for
+libvirt/qemu-img), KI-P3-001.
+
+!!! DESTRUCTIVE. Phase 1 (destroy-model + release) and Phase 2 (OSD wipe) are
+    irreversible short of the KVM snapshots (the D-017 safety net). Each destructive
+    step is DISCRETE and individually gated -- do not batch.
+
+CAPI-MGMT NOTE: this teardown releases the FOUR openstack hosts only. The MAAS
+`capi-mgmt` VM is the RETIRED D-033 out-of-cloud node; the in-cloud `capi-mgmt-v2`
+tenant VM (phase-06) replaces it. Leave `capi-mgmt` Ready (its separate Phase-7
+teardown is out of scope here). (The older 01-destroy-model.md released 5 VMs incl.
+capi-mgmt -- that was the D-033 era; do NOT release it on the current rebuild.)
+
+---
+
+## Prerequisites
+- KVM snapshots of openstack0-3 exist (safety net). Authenticated juju session
+  (`juju whoami`). MAAS CLI logged in as profile `admin`.
+- Run from jumphost `vopenstack-jesse` (user `jessea123`, sudo; also the libvirt hypervisor).
+
+## Constants and env-literals
+- MAAS profile: `admin` (DOCFIX-016: NEVER `maas list` -- it prints the API key).
+- system_ids (hardcode; DOCFIX-017, no `maas whoami`): openstack0=`4na83t`,
+  openstack1=`qdbqd6`, openstack2=`h8frng`, openstack3=`tmsafc`.
+- MAAS subnet ids: 1=provider 10.12.4.0/22, 2=metal 10.12.8.0/22, 6=data 10.12.12.0/22,
+  7=storage 10.12.16.0/22, 8=replication 10.12.20.0/22.
+- per-host storage NIC octet = 40 + index: data 10.12.12.4N, storage 10.12.16.4N, replication 10.12.20.4N.
+
+## Run-location legend
+- `# RUN: jumphost` -- `juju` + `maas admin`; the jumphost is also the libvirt hypervisor (sudo).
+
+---
+
+## Phase 0 -- Pre-flight (READ-ONLY; run before teardown)
+`# RUN: jumphost`
+```bash
+( {
+  echo "=== 0a. five network spaces (hard blocker if absent) ==="
+  juju spaces   # expect metal 10.12.8.0/22 | provider 10.12.4.0/22 | data 10.12.12.0/22 | storage 10.12.16.0/22 | replication 10.12.20.0/22
+
+  echo "=== 0b. VIP ipranges (note the front-loaded ones to KEEP + the stale .224-.254 to remove) ==="
+  maas admin ipranges read \
+    | jq -r '.[] | "id=\(.id)\ttype=\(.type)\t\(.start_ip)-\(.end_ip)\tsubnet=\(.subnet.cidr // "?")\t\(.comment // "")"' | sort
+  #   KEEP: provider 10.12.4.2-.63, metal 10.12.8.2-.63 (bundle VIPs live here), provider FIP 10.12.5.0-10.12.7.254.
+  #   STALE: metal 10.12.8.224-.254 (old scheme) -> its id feeds Phase 4 (this arc: id=2).
+
+  echo "=== 0c. storage-class NIC link state on all four hosts (drives Phase 3) ==="
+  for SID in 4na83t qdbqd6 h8frng tmsafc; do echo "  -- $SID --"
+    maas admin interfaces read "$SID" | jq -r '.[] | select(.name|test("^enp(8|9|10)s0$"))
+      | "    \(.name)\tid=\(.id)\tlinks=\([.links[]?|{(.subnet.cidr):.ip_address}])"'
+  done   # enp8s0(data) is the one KNOWN unlinked + a HARD deploy prereq; enp9s0/enp10s0 usually already linked
+} )
+```
+```bash
+# 0d. OSD-wipe pre-flight gate -- post-teardown these are "shut off"; vdb is root:root / 600. (R7: sudo)
+for host in openstack0 openstack1 openstack2 openstack3; do
+  f="/var/lib/libvirt/images/${host}-1.qcow2"
+  printf '%-46s state=%s owner=%s mode=%s\n' "$f" \
+    "$(sudo virsh -c qemu:///system domstate "$host" 2>/dev/null)" \
+    "$(sudo stat -c '%U:%G' "$f" 2>/dev/null)" "$(sudo stat -c '%a' "$f" 2>/dev/null)"
+done   # expect (AFTER Phase 1 release): 4 lines, state=shut off, owner=root:root, mode=600
+```
+
+## Phase 1 -- Teardown (D-018)  DISCRETE / DESTRUCTIVE
+`# RUN: jumphost`
+```bash
+# A. pre-destroy capture (reference only; NOT for restore)
+TS=$(date -u +%Y%m%dT%H%M%SZ); BACKUP_DIR=$HOME/backups/pre-caracal-destroy-$TS; mkdir -p "$BACKUP_DIR"
+juju export-bundle > "$BACKUP_DIR/bundle-pre-destroy.yaml"
+juju status --format=yaml > "$BACKUP_DIR/juju-status-pre-destroy.yaml"
+for f in "$BACKUP_DIR"/*.yaml; do [ -s "$f" ] || echo "WARNING: $f empty"; done
+echo "$BACKUP_DIR" > "$HOME/.last-pre-caracal-destroy-backup"; ls -la "$BACKUP_DIR"
+```
+```bash
+# B. destroy the openstack model (returns ~1-2 min; reaping ~5-10 min background). Controller untouched.
+juju destroy-model openstack --force --no-wait --destroy-storage --no-prompt
+```
+```bash
+# C. release the FOUR openstack hosts by system_id (DOCFIX-017: hardcoded ids, no whoami). NOT capi-mgmt.
+for SID in 4na83t qdbqd6 h8frng tmsafc; do
+  echo "Releasing $SID..."; maas admin machine release "$SID" comment="Caracal rebuild teardown $TS"
+done
+```
+```bash
+# D. verify
+juju models   # expect: no 'openstack' (allow a few min)
+maas admin machines read \
+  | jq -r '.[] | select(.hostname|test("^openstack[0-3]$")) | "\(.hostname)\t\(.status_name)"' | sort
+  # expect four lines, each ending "Ready"
+```
+GATE: `juju models` shows no `openstack`; openstack0-3 all Ready. (`link-subnet` is
+REJECTED on a Deployed machine -- Phases 2-3 REQUIRE Ready.) If the model is still
+`destroying` after ~10 min: `juju machines -m openstack --format=yaml`, then
+`juju remove-machine -m openstack --force <id>` for each lingering id, then re-run the
+destroy-model in B.
+
+## Phase 2 -- OSD secondary-disk wipe (clean-slate Ceph)  DISCRETE / DESTRUCTIVE
+`# RUN: jumphost (libvirt host; R7 sudo)`  Only after Phase 0d is GREEN (all "shut
+off") AND explicit go. vda (the OS disk) is NOT touched -- MAAS reinstalls it on
+deploy; only vdb (the OSD target) is recreated blank.
+```bash
+OWNER="root:root"; MODE="600"
+for host in openstack0 openstack1 openstack2 openstack3; do
+  f="/var/lib/libvirt/images/${host}-1.qcow2"
+  echo "=== Wiping $f ==="
+  sudo rm -f "$f"
+  sudo qemu-img create -f qcow2 "$f" 512G
+  sudo chown "$OWNER" "$f"; sudo chmod "$MODE" "$f"
+  sudo ls -lh "$f"
+done
+# verify
+for host in openstack0 openstack1 openstack2 openstack3; do
+  sudo qemu-img info "/var/lib/libvirt/images/${host}-1.qcow2" | grep -E 'virtual size|disk size'
+done
+```
+GATE: 4 files, ~200 KiB actual / 512 GiB virtual, root:root mode 600.
+
+## Phase 3 -- Storage-class NIC links (idempotent; machines Ready)
+`# RUN: jumphost`  Links every storage-class NIC to its space's subnet. enp8s0 (data)
+is the one KNOWN unlinked and a HARD deploy prereq (nova-compute:neutron-plugin->data,
+octavia:ovsdb-cms->data, chassis data bindings). enp9s0/enp10s0 back the C2 Ceph
+public/cluster bindings; this links them too only if not already linked.
+```bash
+declare -A NIC_CIDR=( [enp8s0]=10.12.12.0/22 [enp9s0]=10.12.16.0/22 [enp10s0]=10.12.20.0/22 )
+declare -A HOST_OCTET=( [4na83t]=40 [qdbqd6]=41 [h8frng]=42 [tmsafc]=43 )
+declare -A HN=( [4na83t]=openstack0 [qdbqd6]=openstack1 [h8frng]=openstack2 [tmsafc]=openstack3 )
+
+for SID in 4na83t qdbqd6 h8frng tmsafc; do
+  echo "=== ${HN[$SID]} ($SID) ==="
+  IFJSON=$(maas admin interfaces read "$SID")
+  for NIC in enp8s0 enp9s0 enp10s0; do
+    cidr="${NIC_CIDR[$NIC]}"; prefix="${cidr%.0/22}"; ip="${prefix}.${HOST_OCTET[$SID]}"
+    ifid=$(echo "$IFJSON" | jq -r --arg n "$NIC" '.[]|select(.name==$n)|.id')
+    if [ -z "$ifid" ]; then echo "  $NIC: NOT FOUND -- inspect 'maas admin interfaces read $SID'"; continue; fi
+    linked=$(echo "$IFJSON" | jq -r --arg c "$cidr" --argjson id "$ifid" \
+              '[.[]|select(.id==$id).links[]?|select(.subnet.cidr==$c)]|length')
+    if [ "$linked" != "0" ]; then echo "  $NIC id=$ifid already on $cidr -- SKIP"; continue; fi
+    subid=$(maas admin subnets read | jq -r --arg c "$cidr" '.[]|select(.cidr==$c)|.id')
+    echo "  $NIC id=$ifid -> $ip (subnet id=$subid, $cidr)"
+    maas admin interface link-subnet "$SID" "$ifid" mode=STATIC subnet="$subid" ip_address="$ip"
+  done
+done
+
+# verify -- every host should now show data/storage/replication links
+for SID in 4na83t qdbqd6 h8frng tmsafc; do
+  echo "=== ${HN[$SID]} ($SID) ==="
+  maas admin interfaces read "$SID" \
+    | jq -r '.[] | select(.name|test("^enp(8|9|10)s0$")) | "  \(.name)\t\([.links[]?|{(.subnet.cidr):.ip_address}])"'
+done
+```
+GATE: each host's enp8s0/enp9s0/enp10s0 shows a 10.12.{12,16,20}.4N STATIC link.
+
+## Phase 4 -- MAAS VIP/FIP address carve (mutation; confirm-first)
+`# RUN: jumphost`  The bundle's VIPs live in the front-loaded /26 blocks; the FIP
+pool (phase-04) lives at 10.12.5.0-10.12.7.254. These MAAS reservations persist
+across teardown, so on a repeat rebuild they usually already exist -- verify, create
+only if absent, and delete the stale old-scheme reservation. (KI-P3-001: a reserved
+range stops MAAS auto-static landing a primary on a configured VIP.)
+```bash
+# 4a. verify current state
+maas admin ipranges read | jq -r '.[] | "id=\(.id)\t\(.type)\t\(.start_ip)-\(.end_ip)\tsubnet=\(.subnet.cidr // "?")\t\(.comment // "")"' | sort
+#   want present: provider .4.2-.63 (subnet 1), metal .8.2-.63 (subnet 2), provider FIP .5.0-.7.254.
+#   want absent : metal .8.224-.254 (stale).
+```
+```bash
+# 4b. create the front-loaded VIP reservations ONLY if absent (idempotent; carve doc section 8)
+( {
+  RANGES="$(maas admin ipranges read)"
+  [ -n "$RANGES" ] || { echo "ipranges read failed/empty -- ABORT (do not create blind)"; exit 1; }
+  # provider VIPs 10.12.4.2-.63 (subnet 1)
+  if printf '%s' "$RANGES" | jq -e '.[]|select(.start_ip=="10.12.4.2" and .end_ip=="10.12.4.63")' >/dev/null; then
+    echo "provider .4.2-.63 present -- SKIP"
+  else
+    maas admin ipranges create type=reserved subnet=1 start_ip=10.12.4.2 end_ip=10.12.4.63 \
+      comment="OpenStack public API HA VIPs (front-loaded /26; supersedes .224-.236)"
+  fi
+  # metal VIPs 10.12.8.2-.63 (subnet 2)
+  if printf '%s' "$RANGES" | jq -e '.[]|select(.start_ip=="10.12.8.2" and .end_ip=="10.12.8.63")' >/dev/null; then
+    echo "metal .8.2-.63 present -- SKIP"
+  else
+    maas admin ipranges create type=reserved subnet=2 start_ip=10.12.8.2 end_ip=10.12.8.63 \
+      comment="OpenStack internal/admin API HA VIPs (front-loaded /26; supersedes D-020 .224-.254)"
+  fi
+} )
+```
+```bash
+# 4c. delete the stale .224-.254 metal reservation -- CONFIRM the id from 4a first (this arc: id=2)
+#   maas admin iprange delete <stale-id>
+```
+GATE: `ipranges read` shows provider FIP + provider VIPs .4.2-.63 + metal VIPs
+.8.2-.63; the metal .8.224-.254 reservation is gone; the metal DHCP dynamic
+(10.12.9.0-10.12.11.254) is unchanged.
+
+## Phase 5 -- Post-prep verification (READ-ONLY gate before deploy)
+`# RUN: jumphost`
+```bash
+( {
+  juju spaces                                              # 5 spaces present
+  maas admin machines read | jq -r '.[]|select(.hostname|test("^openstack[0-3]$"))|"\(.hostname)\t\(.status_name)"' | sort   # all Ready
+  for SID in 4na83t qdbqd6 h8frng tmsafc; do echo "-- $SID --"
+    maas admin interfaces read "$SID" | jq -r '.[]|select(.name|test("^enp(8|9|10)s0$"))|"  \(.name)\t\([.links[]?|{(.subnet.cidr):.ip_address}])"'
+  done                                                     # data/storage/replication links on all four
+  for host in openstack0 openstack1 openstack2 openstack3; do
+    sudo qemu-img info "/var/lib/libvirt/images/${host}-1.qcow2" | grep -E 'virtual size|disk size'
+  done                                                     # OSD 512G blank
+} )
+```
+
+---
+
+## EXIT GATE (phase-00 complete)
+- `juju models` shows no `openstack`; openstack0-3 all Ready.
+- OSD vdb files 512 GiB blank (root:root, 600) on all four hosts.
+- enp8s0/enp9s0/enp10s0 linked (10.12.{12,16,20}.4N STATIC) on all four.
+- MAAS carve: front-loaded VIP /26 reserved on provider + metal; FIP pool reserved;
+  stale .224-.254 gone.
+- Clean slate ready for phase-01 (deploy). NOTE: the deploy uses ONE overlay
+  (octavia-pki only) -- NOT the vr0-dc0-testcloud overlay (R10; that overlay's intent
+  is folded into the hardened base bundle).
+
+## As-built reference (rebuild-prep arc -- audit trail)
+- Teardown D-018: `juju destroy-model openstack --force --no-wait --destroy-storage
+  --no-prompt`; release the four hosts by system_id (capi-mgmt left Ready).
+- OSD wipe proven 2026-05-22, re-run 2026-05-30: 512G blank, root:root, 600.
+- NIC links: enp8s0 found UNLINKED this arc (the hard prereq); enp9s0/enp10s0 already
+  linked. Reference enp8s0 ids (arc): openstack1=26, openstack2=32, openstack3=38;
+  openstack0 resolved dynamically (the block does not depend on these).
+- MAAS carve: front-loaded .2-.63 reservations created earlier and persistent; stale
+  metal .224-.254 was iprange id=2 (deleted after confirmation).
+
+## Next
+phase-01 -- bundle deploy.
diff --git a/runbooks/phase-01-bundle-deploy.md b/runbooks/phase-01-bundle-deploy.md
new file mode 100644
index 0000000..a6a28b9
--- /dev/null
+++ b/runbooks/phase-01-bundle-deploy.md
@@ -0,0 +1,297 @@
+# Phase 01 -- Bundle Deploy
+
+Deploy the hardened bundle + the octavia-pki overlay onto the freshly-prepped MAAS
+machines, and verify it settles to the expected PRE-vault-init state (zero errors,
+vault awaiting init, the TLS consumers awaiting vault certs). Vault init is phase-02.
+
+Decisions: B5 (IP-only), D-019 (no designate), D-020 (dual provider+metal VIPs),
+R14 (VIPs front-loaded to .50-.60), Section-G NIC bindings. Troubleshooting:
+appendix-A -- R14 (VIP relocation), R15 (.10 phantom resolver), L1 (no `set -e` on
+count-gate blocks), L3 (metal-side dual-VIP eyeball check), DOCFIX-016 (maas list leak).
+
+---
+
+## Prerequisites (must be true entering phase-01)
+- phase-00 done: 4 machines Ready/power=off; MAAS carve applied (front-loaded VIP /26
+  reserved, FIP pool reserved, stale iprange gone); enp8s0 data NIC linked on ALL four
+  hosts; OSD `/dev/vdb` wiped blank.
+- `overlays/octavia-pki.yaml` present (Step 1.0).
+- Hardened `bundle.yaml` in the working dir (channels pinned; VIPs `.50-.60`;
+  reserved-host-memory 8192; image-conversion; use-policyd-override).
+
+## Constants and env-literals
+- MAAS system_ids: openstack0=`4na83t`, openstack1=`qdbqd6`, openstack2=`h8frng`, openstack3=`tmsafc`.
+- MAAS subnet ids: 1=provider 10.12.4.0/22, 2=metal 10.12.8.0/22, 6=data 10.12.12.0/22,
+  7=storage 10.12.16.0/22, 8=replication 10.12.20.0/22, 9=lbaas 10.12.32.0/22.
+- expected plan: 50 apps, 97 relations, 4 machines (bundle 8/9/10/11 -> juju 0/1/2/3), 24 LXD.
+
+## Run-location legend
+- `# RUN: jumphost` -- `juju` + `maas admin` (MAAS profile is `admin`; never `maas list` -- DOCFIX-016).
+
+---
+
+## Step 1.0 -- Octavia PKI overlay (secret-handling prereq)  DISCRETE
+`overlays/octavia-pki.yaml` carries the 5 lb-mgmt-* PKI keys (controller CA/cert,
+issuing CA key+passphrase+cert). It is the ONLY overlay in the deploy command and is
+secret-safe + ASCII. PRIMARY path: reuse the existing validated overlay (the CAs are
+10y, so it survives rebuilds). REGENERATION path (fresh CAs): run the discrete secret
+procedure inlined as "Step 1.0-GEN" at the end of this phase. Either way, confirm the
+overlay parses and contains exactly the 5 keys (sanity block below) before deploying.
+```bash
+# RUN: jumphost -- sanity only (does NOT print key material)
+[ -f overlays/octavia-pki.yaml ] && grep -cE 'lb-mgmt-' overlays/octavia-pki.yaml   # expect 5 keys
+LC_ALL=C grep -nP '[^\x00-\x7F]' overlays/octavia-pki.yaml && echo "NON-ASCII" || echo "ASCII clean"
+```
+
+## Step 1.1 -- Pre-deploy verify (read-only; 4 checks)
+`# RUN: jumphost`  One consolidated read-only block. NO `set -e` (a guarded count of
+0 is a valid answer, not a failure -- appendix-A: L1); count greps are `|| true`.
+```bash
+( {
+  echo "=== CHECK 1: bundle VIPs (quote-tolerant, octet-anchored) ==="
+  grep -nE '^[[:space:]]+vip:' bundle.yaml
+  TOT=$(grep -cE '^[[:space:]]+vip:[[:space:]]*"?10\.12\.4\.' bundle.yaml || true)
+  HI=$(grep -cE '^[[:space:]]+vip:[[:space:]]*"?10\.12\.4\.(5[0-9]|60)("|$|[[:space:]])' bundle.yaml || true)
+  LO=$(grep -cE '^[[:space:]]+vip:[[:space:]]*"?10\.12\.4\.(1[0-9]|20)("|$|[[:space:]])' bundle.yaml || true)
+  echo "  provider VIPs total=$TOT  in .50-.60=$HI  in .10-.20(stale)=$LO   (want 11/11/0)"
+  # metal side is the second token of each dual vip; eyeball that all 11 are .8.50-.60,
+  # clear of metal infra .8.10(maas)/.8.20(lxd)/.8.21(capi)/.8.30(juju) -- appendix-A: L3.
+
+  echo "=== CHECK 2: enp8s0 data NIC linked on ALL FOUR hosts (10.12.12.0/22) ==="
+  for SID in 4na83t qdbqd6 h8frng tmsafc; do
+    echo -n "  $SID: "
+    maas admin interfaces read "$SID" | jq -r '.[] | select(.name=="enp8s0")
+      | [.links[]? | select(.subnet.cidr=="10.12.12.0/22") | .ip_address] | join(",")'
+  done   # expect 10.12.12.40 / .41 / .42 / .43 (select by .subnet.cidr -> robust to id drift)
+
+  echo "=== CHECK 3: subnet DNS resolvers ==="
+  for ID in 1 2 6 7 8 9; do maas admin subnet read "$ID" | jq -c '{id,cidr,dns_servers}'; done
+  # expect subnet 1 (provider) -> [10.12.4.1]; 2/6/7/8/9 -> [10.12.8.1]
+
+  echo "=== CHECK 4a: nodes Ready / power off ==="
+  maas admin machines read | jq -r '.[] | select(.system_id|IN("4na83t","qdbqd6","h8frng","tmsafc"))
+    | "\(.hostname) \(.status_name) power=\(.power_state)"'
+} )
+```
+```bash
+# CHECK 4b: OSD /dev/vdb blank (run on each host; sudo required -- appendix-A: R7)
+for h in openstack0 openstack1 openstack2 openstack3; do
+  echo "== $h =="
+  ssh jessea123@$h "sudo qemu-img info /var/lib/libvirt/images/${h}-1.qcow2 | grep -E 'virtual size|disk size'" </dev/null
+done   # expect virtual 512 GiB, disk ~KiB (sparse/blank)
+```
+GATE: VIPs 11/11/0; enp8s0 linked on all 4; subnet DNS as above; 4 nodes Ready; OSD blank.
+
+## Step 1.2 -- Dry-run (guarded)
+`# RUN: jumphost`  Refuse to add a model if `openstack` already exists; require the overlay.
+```bash
+( {
+  juju models 2>&1 | tee /tmp/jmodels.txt
+  if grep -qE '(^|[[:space:]]|/)openstack([[:space:]*]|$)' /tmp/jmodels.txt; then
+    echo "ABORT: an 'openstack' model already exists (teardown is phase-00)"; 
+  elif [ ! -f overlays/octavia-pki.yaml ]; then
+    echo "ABORT: overlays/octavia-pki.yaml missing (Step 1.0)";
+  else
+    juju add-model openstack
+    juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml -m openstack --dry-run
+  fi
+} )
+```
+GATE (from the plan): 50 apps, 97 relations, 4 machines (8/9/10/11 -> 0/1/2/3), 24 LXD;
+ceph-osd/0-3 one per node; nova-compute/0-2 on machines 1/2/3 ONLY (machine 0 =
+OSD+LXD host, no compute); channels match the matrix; relations include
+`octavia:certificates - vault:certificates`, `vault:shared-db - vault-mysql-router`,
+`mysql-innodb-cluster:certificates - vault:certificates`; NO `vault:ha`, NO designate
+(D-019). Only the two benign R11 warnings (L34 `name`, L55 `variables`).
+
+## Step 1.3 -- Deploy (VIP-guarded)
+`# RUN: jumphost`  Re-run the VIP guard inline (the dry-run never echoes vip values),
+then deploy only if 11/11/0.
+```bash
+( {
+  TOT=$(grep -cE '^[[:space:]]+vip:[[:space:]]*"?10\.12\.4\.' bundle.yaml || true)
+  HI=$(grep -cE '^[[:space:]]+vip:[[:space:]]*"?10\.12\.4\.(5[0-9]|60)("|$|[[:space:]])' bundle.yaml || true)
+  LO=$(grep -cE '^[[:space:]]+vip:[[:space:]]*"?10\.12\.4\.(1[0-9]|20)("|$|[[:space:]])' bundle.yaml || true)
+  if [ "$TOT" = 11 ] && [ "$HI" = 11 ] && [ "$LO" = 0 ]; then
+    juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml -m openstack
+  else
+    echo "ABORT: VIP guard failed (total=$TOT hi=$HI lo=$LO; want 11/11/0)"
+  fi
+} )
+```
+
+## Step 1.4 -- DNS gate during deploy (as machines come up)
+`# RUN: jumphost`  Run when machine 0 reaches `started`, then per LXD unit as they
+appear (flag BEFORE the target; logic inside the remote quotes; no outer 2>/dev/null):
+```bash
+juju ssh -m openstack 0 -- 'resolvectl status | grep -i "DNS Server"; getent hosts api.snapcraft.io && echo OK || echo FAIL'
+# repeat for ceph-mon/0, mysql-innodb-cluster/0 as they appear
+```
+GATE: each returns OK (api.snapcraft.io resolves -> the snap install storm proceeds
+clean). FINDING (non-blocking, R15): the unreachable region resolver `10.12.8.10`
+(MAAS region/rack controller, advertised on the metal VLAN independent of the subnet
+field) may still appear in a node's resolver list -- resolution succeeds because
+systemd-resolved deprioritizes `.10` and falls through to `.1`. Latent fragility if
+`.1` ever drops; understand/eliminate for Roosevelt. (appendix-A: R15.)
+
+---
+
+## EXIT GATE (phase-01 complete)
+- Deploy settled to the PRE-vault-init end state:
+  * ZERO units in `error`.
+  * mysql-innodb-cluster x3 ACTIVE ("Cluster is ONLINE").
+  * vault/0 BLOCKED "Vault needs to be initialized" (the phase-02 trigger, not a fault).
+  * Waiting on vault certs (expected pre-init): ovn-central x3, ovn-chassis x3
+    (incl nova-compute subordinates), ovn-chassis-octavia, neutron-api-plugin-ovn, barbican-vault.
+  * octavia BLOCKED "Awaiting configure-resources" (D-021); gss unknown (pre-run).
+- Section-G NIC payoff confirmed (no subset/binding errors): ceph-mon -> storage 10.12.16.x;
+  octavia -> data 10.12.12.1; nova-compute -> data 10.12.12.4x; vault -> metal 10.12.8.x.
+- Proceed to phase-02 (vault init).
+
+## As-built reference (2026-06-03 second redeploy -- audit trail)
+- `juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml -m openstack` on maas/default (cred maas-api).
+- Plan: 50 apps / 97 relations / 4 machines / 24 LXD; placement as above.
+- Pre-deploy verify: VIPs 11/11/0; enp8s0 -> 10.12.12.40-43 (all 4); subnet DNS as above; nodes Ready; OSD blank.
+- Settled: zero errors; mysql /0 R/W (10.12.8.173), /1 (.179) /2 (.185) R/O; vault blocked needs-init.
+
+## Next
+phase-02 -- vault bring-up.
+
+---
+
+## Step 1.0-GEN -- Octavia management-PKI generation (regeneration path)  DISCRETE / SECRET
+Run ONLY if you are not reusing an existing `overlays/octavia-pki.yaml`. Produces the
+two-tier EC PKI for Charmed Octavia's amphora trust domain and writes the overlay.
+Decisions (Workstream 3a, 2026-05-22): fresh generation; EC P-384 CAs (SHA-384, 10y);
+EC P-256 controller cert (2y); overlay-file distribution (gitignored); artifacts under
+`$HOME/octavia-pki/`; passphrases = 32 random bytes base64 (44 chars). SECRET step --
+do NOT echo key material; the only printed values are cert dates/subjects and verify OK.
+
+The five `octavia` charm options the overlay sets:
+- `lb-mgmt-issuing-cacert`            = base64(issuing CA cert)
+- `lb-mgmt-issuing-ca-private-key`    = base64(issuing CA ENCRYPTED key)
+- `lb-mgmt-issuing-ca-key-passphrase` = the issuing CA passphrase (PLAIN string, NOT base64)
+- `lb-mgmt-controller-cacert`         = base64(controller CA cert)
+- `lb-mgmt-controller-cert`           = base64(controller cert + key, concatenated)
+
+### 1.0-GEN.0 -- workspace (openssl 3.x; $HOME only -- snap home-confinement, never /tmp)
+```bash
+# RUN: jumphost
+WORKDIR="$HOME/octavia-pki"
+mkdir -p "$WORKDIR"/issuing-ca "$WORKDIR"/controller-ca "$WORKDIR"/controller
+chmod 700 "$WORKDIR"
+openssl version    # expect OpenSSL 3.x
+```
+
+### 1.0-GEN.a -- Issuing CA (EC P-384, AES-256 encrypted key, self-signed 10y)
+```bash
+( {
+  WORKDIR="$HOME/octavia-pki"; cd "$WORKDIR/issuing-ca" || exit 1   # dir from 1.0-GEN.a
+  openssl rand -base64 32 | tr -d '\n' > passphrase.txt
+  chmod 600 passphrase.txt
+  test "$(wc -c < passphrase.txt)" -eq 44 || { echo "ABORT: issuing passphrase length != 44"; exit 1; }
+  openssl genpkey -algorithm EC -pkeyopt ec_paramgen_curve:P-384 \
+    -aes-256-cbc -pass file:passphrase.txt -out issuing-ca.key.enc
+  chmod 600 issuing-ca.key.enc
+  openssl req -new -x509 -sha384 -key issuing-ca.key.enc -passin file:passphrase.txt \
+    -days 3650 -subj "/CN=VR0 DC0 Omega Cloud Octavia Issuing CA/O=Neumatrix" \
+    -out issuing-ca.cert.pem
+  openssl x509 -in issuing-ca.cert.pem -noout -dates -subject
+  openssl verify -CAfile issuing-ca.cert.pem issuing-ca.cert.pem    # expect: OK
+} )
+```
+
+### 1.0-GEN.b -- Controller CA (EC P-384, AES-256 encrypted key, self-signed 10y; own passphrase)
+The controller CA key is encrypted (its own passphrase) for future controller-cert
+rotation -- Octavia never receives this key, only the controller CA cert.
+```bash
+( {
+  WORKDIR="$HOME/octavia-pki"; cd "$WORKDIR/controller-ca" || exit 1   # dir from 1.0-GEN.a
+  openssl rand -base64 32 | tr -d '\n' > passphrase.txt
+  chmod 600 passphrase.txt
+  test "$(wc -c < passphrase.txt)" -eq 44 || { echo "ABORT: controller passphrase length != 44"; exit 1; }
+  openssl genpkey -algorithm EC -pkeyopt ec_paramgen_curve:P-384 \
+    -aes-256-cbc -pass file:passphrase.txt -out controller-ca.key.enc
+  chmod 600 controller-ca.key.enc
+  openssl req -new -x509 -sha384 -key controller-ca.key.enc -passin file:passphrase.txt \
+    -days 3650 -subj "/CN=VR0 DC0 Omega Cloud Octavia Controller CA/O=Neumatrix" \
+    -out controller-ca.cert.pem
+  openssl x509 -in controller-ca.cert.pem -noout -dates -subject
+  openssl verify -CAfile controller-ca.cert.pem controller-ca.cert.pem    # expect: OK
+} )
+```
+
+### 1.0-GEN.c -- Controller cert (EC P-256 UNENCRYPTED, SAN, signed by Controller CA, 2y)
+The P-256 key is unencrypted -- Octavia reads it at startup. SAN carries the controller
+FQDN, the octavia API FQDN, and the Octavia API VIP 10.12.4.233.
+```bash
+( {
+  WORKDIR="$HOME/octavia-pki"; cd "$WORKDIR/controller" || exit 1   # dir from 1.0-GEN.a
+  openssl genpkey -algorithm EC -pkeyopt ec_paramgen_curve:P-256 -out controller.key
+  chmod 600 controller.key
+  cat > controller.cnf <<'CNF'
+[req]
+distinguished_name = req_distinguished_name
+req_extensions = v3_req
+prompt = no
+
+[req_distinguished_name]
+CN = octavia-controller.omega.dc0.vr0.cloud.neumatrix.local
+O = Neumatrix
+
+[v3_req]
+keyUsage = critical, digitalSignature, keyEncipherment
+extendedKeyUsage = clientAuth, serverAuth
+subjectAltName = @alt_names
+
+[alt_names]
+DNS.1 = octavia-controller.omega.dc0.vr0.cloud.neumatrix.local
+DNS.2 = octavia.omega.dc0.vr0.cloud.neumatrix.local
+IP.1 = 10.12.4.233
+CNF
+  openssl req -new -sha256 -key controller.key -config controller.cnf -out controller.csr
+  openssl x509 -req -sha256 -in controller.csr \
+    -CA ../controller-ca/controller-ca.cert.pem \
+    -CAkey ../controller-ca/controller-ca.key.enc \
+    -passin file:../controller-ca/passphrase.txt \
+    -CAcreateserial -days 730 \
+    -extfile controller.cnf -extensions v3_req \
+    -out controller.cert.pem
+  openssl verify -CAfile ../controller-ca/controller-ca.cert.pem controller.cert.pem  # expect: OK
+  openssl x509 -in controller.cert.pem -noout -ext subjectAltName     # DNS x2 + IP present
+  openssl x509 -in controller.cert.pem -noout -dates
+  cat controller.cert.pem controller.key > controller.bundle.pem
+  chmod 600 controller.bundle.pem
+} )
+```
+
+### 1.0-GEN.d -- Write overlays/octavia-pki.yaml (base64 blobs + plaintext passphrase)
+Four values are base64(PEM); the issuing-CA passphrase is a PLAIN string. The file is
+gitignored. Set `$REPO` to the jumphost clone (the dir holding bundle.yaml + overlays/).
+```bash
+( {
+  WORKDIR="$HOME/octavia-pki"; cd "$WORKDIR" || exit 1   # dir from 1.0-GEN.a
+  REPO="${REPO:-$HOME/openstack-caracal-ipv4}"   # adjust to the actual clone path
+  mkdir -p "$REPO/overlays"
+  ISS_CERT=$(base64 -w0 issuing-ca/issuing-ca.cert.pem)
+  ISS_KEY=$(base64 -w0 issuing-ca/issuing-ca.key.enc)
+  ISS_PASS=$(cat issuing-ca/passphrase.txt)
+  CON_CACERT=$(base64 -w0 controller-ca/controller-ca.cert.pem)
+  CON_CERT=$(base64 -w0 controller/controller.bundle.pem)
+  cat > "$REPO/overlays/octavia-pki.yaml" <<OVL
+applications:
+  octavia:
+    options:
+      lb-mgmt-issuing-cacert: "$ISS_CERT"
+      lb-mgmt-issuing-ca-private-key: "$ISS_KEY"
+      lb-mgmt-issuing-ca-key-passphrase: "$ISS_PASS"
+      lb-mgmt-controller-cacert: "$CON_CACERT"
+      lb-mgmt-controller-cert: "$CON_CERT"
+OVL
+  chmod 600 "$REPO/overlays/octavia-pki.yaml"
+  echo "wrote $REPO/overlays/octavia-pki.yaml"
+} )
+```
+Then run the Step 1.0 sanity block (5 keys present; ASCII clean) before deploying. Keep
+`$HOME/octavia-pki/` (the CA keys + passphrases) OFF the repo and backed up securely; the
+10y CAs are reused across rebuilds -- regenerate only on key compromise or CA expiry.
diff --git a/runbooks/phase-02-vault-bringup.md b/runbooks/phase-02-vault-bringup.md
new file mode 100644
index 0000000..fc03cf4
--- /dev/null
+++ b/runbooks/phase-02-vault-bringup.md
@@ -0,0 +1,127 @@
+# Phase 02 -- Vault Bring-up (PKI root; secret-handling)
+
+Initialize, unseal, and authorize Vault -- the cloud's PKI/CA root. This is the
+SECRET-HANDLING phase: every step is DISCRETE and individually gated (never
+batched), secrets go through hidden prompts (never on argv / in a var / in
+scrollback / in a juju action log), and the init key material is saved OFF-HOST
+immediately.
+
+Decisions: vault-on-mysql backend (etcd/easyrsa dropped -- C1). Troubleshooting:
+appendix-A -- DOCFIX-006 (init one-shot capture), DOCFIX-011 (authorize-charm token),
+DOCFIX-014 (generate-root-ca required), L4 (unseal via hidden prompt), R3 (HA Enabled
+false is correct here).
+
+!!! IRREVERSIBLE ONE-SHOT -- `vault operator init` runs EXACTLY ONCE per vault. It
+    prints the 5 unseal-key shares and the root token ONE TIME ONLY. A stdout-only
+    `>` redirect drops the key block to a file that loses stderr and you can be left
+    with NOTHING (DOCFIX-006 / the B15 incident). Run the init command VERBATIM as
+    written in 2.1 (`2>&1 | tee`), confirm the gate, and SAVE the captured file
+    off-host before doing anything else. Lost shares = unrecoverable vault.
+
+---
+
+## Prerequisites (must be true entering phase-02)
+- phase-01 done: bundle deployed; mysql-innodb-cluster ACTIVE (vault's backend --
+  it bootstrapped before vault init).
+- vault/0 sits BLOCKED needing initialization (a fresh, uninitialized vault).
+
+## Constants and env-literals
+- vault loopback: `http://127.0.0.1:8200` (on the unit; NOT a VIP -- this is B14's
+  on-unit loopback model, deliberately not the jumphost-CLI + unit-IP path).
+- key-shares=5, key-threshold=3.
+
+## Run-location legend
+- `# RUN: on vault/0` -- inside an interactive `juju ssh -m openstack vault/0` session
+  (the init/unseal/token-mint steps need a tty for hidden prompts -- do NOT pipe them).
+- `# RUN: jumphost` -- `juju run` client calls (authorize / generate-root-ca / status).
+
+---
+
+## Step 2.1 -- Vault init  [IRREVERSIBLE ONE-SHOT -- run verbatim]  DISCRETE
+`# RUN: on vault/0`  Open the session, set the loopback addr, pre-check fresh, then
+init with the `2>&1 | tee` capture (NOT `>`). Save `~/vault-init/init.txt` off-host
+the moment the gate passes.
+```bash
+juju ssh -m openstack vault/0
+# --- inside the vault/0 session: ---
+export VAULT_ADDR=http://127.0.0.1:8200 ; umask 077 ; mkdir -p ~/vault-init
+vault status 2>&1 | grep -E 'Initialized|Sealed|Storage Type|HA Enabled' || true   # pre-check: Initialized false (fresh)
+vault operator init -key-shares=5 -key-threshold=3 2>&1 | tee ~/vault-init/init.txt # DOCFIX-006: 2>&1|tee, NEVER '>'
+grep -c '^Unseal Key' ~/vault-init/init.txt                                         # GATE: MUST print 5
+grep -q '^Initial Root Token:' ~/vault-init/init.txt && echo TOKEN_OK || echo MISSING
+```
+GATE: `5` unseal keys AND `TOKEN_OK`. If the count is not 5 or the token is MISSING,
+STOP -- do not proceed (the empty-file case is the DOCFIX-006 catch). Now SAVE the 5
+shares + root token off-host (operator secret store) before continuing. Do NOT batch
+this with unseal.
+
+## Step 2.2 -- Vault unseal (3 of 5)  DISCRETE  (re-runnable)
+`# RUN: on vault/0`  Use Vault's OWN hidden prompt -- the key is never on the command
+line, in a var, or in scrollback (appendix-A: L4). Do NOT use `vault operator unseal $K`
+(that puts the key in `ps`/argv).
+```bash
+# --- inside the vault/0 session: ---
+export VAULT_ADDR=http://127.0.0.1:8200
+vault operator unseal      # prompts hidden; paste share 1   -> Unseal Progress 1/3
+vault operator unseal      # prompts hidden; paste share 2   -> 2/3
+vault operator unseal      # prompts hidden; paste share 3   -> 3/3
+vault status 2>&1 | grep -E 'Sealed|Initialized|Storage Type|HA Enabled'
+```
+GATE: progress 1/3 -> 2/3 -> 3/3, then `Sealed false`. Expected final: Initialized
+true / Sealed false / Storage Type mysql / **HA Enabled false** (CORRECT for
+single-unit vault-on-mysql -- appendix-A: R3; any "HA true / etcd" reference is stale).
+
+NOTE (unseal policy, v1): MANUAL unseal is the v1 standard -- after any vault unit
+reboot, re-run this 3-of-5 step at the hidden prompt. Auto-unseal (e.g. a transit/KMS
+seal so the unit returns unsealed after a reboot) is an available option, adopted
+case-by-case; it is NOT configured in v1. D-011.6 (phase-08) re-confirms manual unseal.
+
+## Step 2.3 -- Authorize-charm + generate-root-ca  DISCRETE
+First confirm the action schema (DOCFIX-011), then authorize with a SHORT-LIVED CHILD
+token (not the root token -- `juju run` persists action params in the operation log,
+so a minutes-lived token self-limits), then generate the root CA (DOCFIX-014 -- without
+it vault stays blocked "Missing CA cert").
+```bash
+# RUN: jumphost -- schema (read-only): authorize-charm requires `token` (direct-token path)
+juju actions vault --schema --format yaml -m openstack | sed -n '/authorize-charm:/,/^[a-z]/p'
+```
+```bash
+# RUN: on vault/0 -- mint a short-lived child token (root entered hidden, never on argv/history)
+juju ssh -m openstack vault/0
+# --- inside the session: ---
+export VAULT_ADDR=http://127.0.0.1:8200
+read -s -p "root token: " VAULT_TOKEN; echo ; export VAULT_TOKEN
+vault token create -ttl=10m -field=token        # prints ONLY the child token -- copy it
+unset VAULT_TOKEN
+exit
+```
+```bash
+# RUN: jumphost -- authorize + root CA + status (each juju run blocks to completion)
+juju run vault/leader authorize-charm token=<short-lived-child-token> -m openstack
+juju run vault/leader generate-root-ca -m openstack
+juju status vault -m openstack
+```
+GATE: authorize-charm completes; generate-root-ca returns the root CA PEM ("Vault Root
+Certificate Authority (charm-pki-local)"); vault/0 -> active/idle "Unit is ready". The
+"Missing CA cert" block clears straight to active (validates DOCFIX-014).
+(`mlock: disabled` is expected/benign for snap/container vault without IPC_LOCK.)
+
+---
+
+## EXIT GATE (phase-02 complete)
+- Vault Initialized true / Sealed false; 5 shares + root token saved OFF-HOST.
+- vault/0 active/idle; root CA generated (the cloud's PKI anchor).
+- The narrow cert cascade to the Vault consumers (ovn-central x3, ovn-chassis x3,
+  ovn-chassis-octavia, neutron-api-plugin-ovn, barbican-vault) now proceeds -- it is
+  watched and accepted in phase-03.
+
+## As-built reference (2026-06-03 run -- audit trail)
+- init: 5 shares / threshold 3, "Vault initialized with 5 key shares and a key
+  threshold of 3"; captured via `2>&1 | tee ~/vault-init/init.txt`.
+- unseal: 1/3 -> 2/3 -> 3/3 -> Sealed false; Storage Type mysql; HA Enabled false;
+  Version 1.8.8, vault-cluster-872a43d1.
+- authorize: op 1/task 2 OK (short-lived child token); generate-root-ca op 3/task 4
+  returned the root CA (valid 2026-06-03 -> 2036-05-31); vault/0 + vault-mysql-router active.
+
+## Next
+phase-03 -- core verify (cert-cascade settle, admin-openrc, Horizon).
diff --git a/runbooks/phase-03-core-verify.md b/runbooks/phase-03-core-verify.md
new file mode 100644
index 0000000..38d2ddb
--- /dev/null
+++ b/runbooks/phase-03-core-verify.md
@@ -0,0 +1,177 @@
+# Phase 03 -- Core Verify (settle, admin-openrc, Horizon)
+
+After vault's cert cascade (phase-02), confirm the cloud settled to active/idle
+(except the expected post-deploy blocks), build the IP-only `admin-openrc`, verify
+API reachability, and repoint the external Horizon reverse proxy.
+
+Decisions: B5 (IP-only endpoints; no FQDN), D-021 (octavia stays BLOCKED awaiting
+configure-resources -- expected, cleared in phase-05). Troubleshooting: appendix-A --
+DOCFIX-021 (action human-output corrupts captured artifacts), DOCFIX-018 (IP-only
+OS_AUTH_URL), DOCFIX-022 (admin project discovered, not hardcoded).
+
+---
+
+## Prerequisites (must be true entering phase-03)
+- phase-02 done: vault unsealed + authorized; root CA generated; the cert cascade is
+  running/settling.
+- The Vault root CA is available via the vault charm action (pulled below).
+
+## Constants and env-literals (TAG: confirm per site on rebuild)
+- `ENV(keystone-vip)` 10.12.4.50      (keystone PUBLIC endpoint = provider VIP; verify vs bundle)
+- `ENV(admin-domain)` admin_domain    (charmed-keystone admin user + project domain)
+- `ENV(dashboard-vip)` 10.12.4.58     (Horizon provider VIP; was .234 pre-R14)
+- admin project: DISCOVERED at runtime (do not hardcode -- DOCFIX-022).
+
+## Run-location legend
+- `# RUN: jumphost` -- vopenstack-jesse as jessea123; `juju` + `openstack` + `openssl`.
+
+---
+
+## Step 3.1 -- Settle the cert cascade + acceptance walk
+`# RUN: jumphost`  The cascade here is NARROW (mysql bootstrapped before vault init,
+so only the Vault consumers clear: ovn-central x3, ovn-chassis x3,
+ovn-chassis-octavia, neutron-api-plugin-ovn, barbican-vault). Watch, then walk units
+AND subordinates.
+```bash
+juju status --color --watch 30s -m openstack     # Ctrl-C once settled
+```
+Acceptance walk (counts non-active/idle across units + subordinates):
+```bash
+juju status -m openstack --format=yaml | python3 -c "
+import yaml,sys
+d=yaml.safe_load(sys.stdin); apps=d.get('applications',{}); bad=[]
+def chk(n,u):
+    ws=(u.get('workload-status') or {}).get('current',''); js=(u.get('juju-status') or {}).get('current','')
+    msg=(u.get('workload-status') or {}).get('message','')
+    if ws!='active' or js!='idle': bad.append('%s: workload=%s juju=%s msg=%s'%(n,ws,js,msg))
+for app,info in apps.items():
+    for un,ud in (info.get('units') or {}).items():
+        chk(un,ud)
+        for sn,sd in (ud.get('subordinates') or {}).items(): chk(sn,sd)
+print('Non-active/idle units: %d'%len(bad))
+for b in bad: print('  '+b)
+"
+```
+GATE: expected non-active/idle = **1** (octavia/0 BLOCKED "Awaiting configure-resources",
+the D-021 next step) or briefly **2** (+ glance-simplestreams-sync, normal pre-run).
+Any TLS consumer (the five above) persisting waiting/error past ~15 min is the concern
+-- STOP and read its log + relations (do NOT assume TLS; a prior stall was a MySQL 1045
+desync):
+```bash
+juju status --relations -m openstack ovn-central ovn-chassis ovn-chassis-octavia neutron-api-plugin-ovn barbican-vault
+# juju ssh -m openstack <unit> -- 'sudo tail -120 /var/log/juju/unit-<unit-dashed>.log' </dev/null
+```
+
+## Step 3.2 -- Build admin-openrc (IP-only; canonical block)
+`# RUN: jumphost`  Keystone PUBLIC = the provider VIP IP over HTTPS with the vault
+CA (no FQDN, no /etc/hosts -- B5). This canonical block folds in three fixes:
+the CA is pulled via `--format json` + jq because the action's human output wraps the
+PEM in an INDENTED YAML block that is not valid PEM (appendix-A: DOCFIX-021); the
+OS_AUTH_URL is the VIP IP (DOCFIX-018); and the admin project is DISCOVERED by a
+scope-test loop rather than hardcoded, because the scoping project name varies by
+charm rev (DOCFIX-022 -- the cause of a prior HTTP 401). `( set -e )` keeps
+OS_PASSWORD inside the subshell and aborts cleanly on any failure.
+
+```bash
+KEYSTONE_VIP=10.12.4.50              # keystone PUBLIC endpoint = provider VIP (verify vs bundle on rebuild)
+ADMIN_DOMAIN=admin_domain            # charmed-keystone admin user + project domain
+PROJECT_CANDIDATES="admin admin_domain"   # tried in order; first that SCOPES wins (DOCFIX-022 variance)
+CA="$HOME/vault-init/vault-ca-root.pem"
+RC="$HOME/admin-openrc"
+
+( set -e
+  mkdir -p "$HOME/vault-init"
+  # 1. Vault root CA -> file (JSON extract; DOCFIX-021 -- human output indents the PEM)
+  juju run vault/leader get-root-ca -m openstack --format json \
+    | jq -r '[.. | strings | select(test("-----BEGIN CERTIFICATE-----"))][0]' > "$CA"
+  openssl x509 -in "$CA" -noout -subject -dates
+  # 2. Admin password -> var (JSON extract, not human output)
+  ADMIN_PASS=$(juju run keystone/leader get-admin-password -m openstack --format json | python3 -c "
+import json,sys
+d=json.load(sys.stdin)
+def f(o):
+    if isinstance(o,dict):
+        for k in ('admin-password','password','Stdout'):
+            if k in o and o[k]: return str(o[k]).strip()
+        for v in o.values():
+            r=f(v)
+            if r: return r
+    elif isinstance(o,list):
+        for v in o:
+            r=f(v)
+            if r: return r
+    return ''
+print(f(d))
+")
+  [ -n "$ADMIN_PASS" ] || { echo "FATAL: password extract failed"; exit 1; }
+  # 3. PROJECT LOOKUP: first candidate that issues a SCOPED token wins (DOCFIX-022)
+  export OS_AUTH_URL="https://${KEYSTONE_VIP}:5000/v3" OS_USERNAME=admin OS_PASSWORD="$ADMIN_PASS"
+  export OS_USER_DOMAIN_NAME="$ADMIN_DOMAIN" OS_PROJECT_DOMAIN_NAME="$ADMIN_DOMAIN"
+  export OS_IDENTITY_API_VERSION=3 OS_REGION_NAME=RegionOne OS_CACERT="$CA"
+  ADMIN_PROJECT=""
+  for P in $PROJECT_CANDIDATES; do
+    if OS_PROJECT_NAME="$P" openstack token issue >/dev/null 2>&1; then ADMIN_PROJECT="$P"; break; fi
+  done
+  [ -n "$ADMIN_PROJECT" ] || { echo "FATAL: no candidate project scoped (tried: $PROJECT_CANDIDATES)"; exit 1; }
+  echo "[OK] admin project = $ADMIN_PROJECT ; password len ${#ADMIN_PASS}"
+  # 4. Write ~/admin-openrc (backs up any existing one first)
+  [ -f "$RC" ] && mv "$RC" "$RC.pre-$(date -u +%Y%m%dT%H%M%SZ)"
+  cat > "$RC" <<EOF
+export OS_AUTH_URL=https://${KEYSTONE_VIP}:5000/v3
+export OS_USERNAME=admin
+export OS_PASSWORD='$ADMIN_PASS'
+export OS_PROJECT_NAME=$ADMIN_PROJECT
+export OS_USER_DOMAIN_NAME=$ADMIN_DOMAIN
+export OS_PROJECT_DOMAIN_NAME=$ADMIN_DOMAIN
+export OS_IDENTITY_API_VERSION=3
+export OS_REGION_NAME=RegionOne
+export OS_CACERT=$CA
+EOF
+  chmod 600 "$RC"
+)
+# 5. Verify from the written file (password stayed inside the subshell above)
+( source "$RC"; echo "auth -> $OS_AUTH_URL  project=$OS_PROJECT_NAME"; openstack token issue 2>&1 | head -6 )
+( source "$RC"; openstack endpoint list -f value -c "Service Name" -c Interface -c URL 2>&1 | sort )
+```
+GATE: `token issue` returns a SCOPED token; `endpoint list` is IP-only across all
+services (public on the provider VIP `.5x`, internal+admin on the metal VIP `.8.5x`,
+keystone admin on `:35357`). Two non-blocking notes for later: s3/swift is registered
+on the radosgw VIP `.60:443` (re-check vs the radosgw `:80` listener during any
+Swift/S3 smoke); the gss image-stream is HTTP on metal `10.12.8.172`.
+
+## Step 3.3 -- Horizon access via the external nginx reverse proxy
+`# RUN: operator (outside the Juju model)`  Horizon is fronted by an
+operator-managed nginx reverse proxy. On each rebuild / VIP relocation, repoint its
+upstream to the CURRENT dashboard provider VIP (now `https://10.12.4.58`, was `.234`
+pre-R14). Verify two interplays:
+- ALLOWED_HOSTS: Horizon (bundle B5 setting) must permit whatever Host header reaches
+  it, else HTTP 400 DisallowedHost. Either set the proxy `proxy_set_header Host` to the
+  dashboard VIP, or add the proxy hostname to Horizon ALLOWED_HOSTS.
+- Upstream TLS: the dashboard cert is vault-signed for the VIP IP (IP-SAN). The proxy
+  must trust the vault root CA (`~/vault-init/vault-ca-root.pem`) for `proxy_ssl_verify`,
+  or terminate/re-encrypt per policy.
+LIVE-REVIEW: the proxy host + config path + reload command are operator-managed and
+not captured here -- record them verbatim when wired, and confirm an external GET
+reaches the Horizon login. (Roosevelt: this repoint folds into the access/DNS workstream.)
+
+---
+
+## EXIT GATE (phase-03 complete)
+- Cloud settled: acceptance walk shows only the expected block(s) (octavia; maybe gss).
+- `~/admin-openrc` (0600) authenticates and returns a SCOPED token; endpoint list IP-only.
+- Vault root CA at `~/vault-init/vault-ca-root.pem` validates TLS to the keystone VIP.
+- Horizon reachable through the repointed reverse proxy.
+
+## As-built reference (2026-06-03 run -- audit trail)
+- Cascade settled ~04:15Z: all five Vault consumers active/idle; only expected
+  non-active/idle = octavia (blocked, D-021) + gss (pre-run). mysql primary on
+  mysql-innodb-cluster/1 (R/W), /0+/2 R/O (normal innodb-cluster).
+- admin-openrc IP-only: OS_AUTH_URL=https://10.12.4.50:5000/v3, OS_USERNAME=admin,
+  OS_PROJECT_NAME=admin (scoped; project_id 65ce73e6798e4d1e8dd066609b7033ef),
+  domains admin_domain, OS_CACERT=~/vault-init/vault-ca-root.pem.
+- Vault root CA: subject "Vault Root Certificate Authority (charm-pki-local)",
+  notBefore 2026-06-03, notAfter 2036-05-31; TLS to 10.12.4.50:5000 OK (B5 IP-SAN holds).
+- Dashboard VIP 10.12.4.58 (nginx upstream repoint pending operator capture).
+
+## Next
+phase-04 -- network carve (external provider network).
diff --git a/runbooks/phase-04-network-carve.md b/runbooks/phase-04-network-carve.md
new file mode 100644
index 0000000..107da29
--- /dev/null
+++ b/runbooks/phase-04-network-carve.md
@@ -0,0 +1,128 @@
+# Phase 04 -- Network Carve (provider external network + IPAM reference)
+
+Create the Neutron external provider network that sources floating IPs and tenant
+router gateways (the FIP/ext_net leg of Option B), on top of the MAAS address carve
+done pre-deploy in phase-00. Also the IPAM reference for where addresses live.
+
+Decisions: D-003 (provider shared-L2: public API VIPs + FIP/ext_net, Option B;
+FIP pool 10.12.5.0-10.12.7.254), the IPv4 provider/internal carve (front-loaded
+VIP /26), KI-P3-001 (VIP/primary collision -> the reserved-range fix).
+Troubleshooting: appendix-A -- KI-P3-001.
+
+NOTE on what the "carve" is split across:
+- The MAAS ADDRESS carve (delete stale iprange 2; reserve the front-loaded VIP /26
+  on provider + metal) runs POST-TEARDOWN / PRE-REDEPLOY -- it is in phase-00, because
+  you must reserve the VIP block before deploying onto it.
+- The bundle's `vip:` values come from that reserved block -- phase-01.
+- THIS phase creates the Neutron EXTERNAL provider network on top of the carve --
+  the only post-deploy network mutation.
+
+---
+
+## IPAM carve reference (design; full detail in the carve doc + design-decisions D-003)
+Provider 10.12.4.0/22 (role Provider; shared-L2, Option B):
+- 10.12.4.1                  provider gateway
+- 10.12.4.2 - 10.12.4.63     public API HA VIPs (front-loaded /26) -- MAAS RESERVED; EXCLUDED from the
+                             neutron allocation_pool. Every bundle public `vip:` is from here.
+- 10.12.4.64 - 10.12.4.254   host + container primaries (MAAS auto-static)
+- 10.12.5.0 - 10.12.7.254    FIP pool / ext_net allocation_pool (this phase's subnet) -- MAAS RESERVED
+
+Metal 10.12.8.0/22 (role Metal; charm control plane + internal VIPs):
+- 10.12.8.2 - 10.12.8.63     internal/admin API HA VIPs (front-loaded /26) -- MAAS RESERVED
+- 10.12.8.64 - 10.12.8.254   host + container primaries (incl single-unit svc endpoints, e.g. radosgw)
+- 10.12.9.0 - 10.12.11.254   MAAS PXE/enlistment DHCP (dynamic; iprange id 1)
+
+KI-P3-001 invariant: on every space carrying juju VIPs (provider AND metal), the VIP
+block is MAAS-reserved and DISTINCT from the primary range and any neutron
+allocation_pool, so a MAAS auto-static primary can never land on a configured VIP.
+(Root cause of the original collision: provider had NO VIP reservation, so MAAS
+auto-assigned container primaries .225/.226/.227 onto the .224-.236 VIP block.)
+
+## Prerequisites (must be true entering phase-04)
+- phase-01/02/03 done (deploy + vault + core verify); charms active/idle.
+- phase-00 MAAS carve applied: FIP pool 10.12.5.0-10.12.7.254 RESERVED on the provider
+  subnet (iprange id 3), and the front-loaded VIP /26 reservations present.
+- Provider segment is FLAT on physnet1 (bundle ovn-bridge-mappings physnet1:br-ex;
+  flat-network-providers=physnet1). The provider /22 is untagged L2 (not vlan).
+
+## Constants and env-literals (TAG: confirm per site on rebuild)
+- `ENV(physnet)`    physnet1
+- `ENV(ext-net)`    provider-ext        `ENV(ext-subnet)` provider-ext-fip
+- `ENV(ext-cidr)`   10.12.4.0/22  (full provider /22 so .1 gateway is in-subnet + FIP ARP spans the L2)
+- `ENV(fip-pool)`   10.12.5.0 - 10.12.7.254   (D-003 Option-A; ~765 FIPs; full pool, not a slice)
+- gateway 10.12.4.1 -- READ DYNAMICALLY from MAAS, never hardcoded.
+
+## Run-location legend
+- `# RUN: jumphost` -- vopenstack-jesse as jessea123, admin-openrc sourced; `openstack` + `maas admin`.
+
+---
+
+## Step 4.1 -- Create the external provider network (B29; idempotent)
+`# RUN: jumphost`  `--external` but NOT `--share` (usable as router gateway + FIP
+source, but tenants cannot attach instance ports to the provider segment -- Option B
+isolation). `--no-dhcp` (MAAS owns DHCP on this segment; FIPs are NAT'd). The subnet
+is the FULL provider /22 with the FIP pool as the allocation_pool; the VIP block and
+primaries are MAAS-reserved so neutron never allocates them.
+
+Read-only pre-check first (verify the FIP pool is MAAS-reserved so neutron can own it):
+```bash
+# RUN: jumphost (MAAS profile is 'admin'; never run 'maas list' -- it prints the API key, DOCFIX-016)
+maas admin ipranges read | jq -r '.[] | select(.type=="reserved") | "\(.start_ip)-\(.end_ip) subnet=\(.subnet.id) [\(.comment)]"'
+# expect a reserved 10.12.5.0-10.12.7.254 on subnet id 1 (provider); + the front-loaded VIP /26 reservations.
+```
+Create (idempotent `( set -e )`; dynamic gateway; tags applied via `set`, not an
+inline `--tag` flag):
+```bash
+source ~/admin-openrc
+( set -e
+  PHYSNET=physnet1; EXT_NET=provider-ext; EXT_SUBNET=provider-ext-fip
+  EXT_CIDR=10.12.4.0/22; FIP_START=10.12.5.0; FIP_END=10.12.7.254
+  GW=$(maas admin subnet read 1 | jq -r '.gateway_ip')                 # dynamic; never hardcode .1
+  [ "$GW" = "10.12.4.1" ] || { echo "GATE FAIL: MAAS provider gateway='$GW' (expected 10.12.4.1)"; exit 1; }
+  echo "[OK] gateway $GW"
+  if openstack network show "$EXT_NET" -f value -c id >/dev/null 2>&1; then
+    echo "[SKIP] network $EXT_NET exists"
+  else
+    openstack network create --external --provider-network-type flat \
+      --provider-physical-network "$PHYSNET" "$EXT_NET" -f value -c id
+    openstack network set --tag role=provider "$EXT_NET"
+    echo "[OK] network $EXT_NET created + tagged"
+  fi
+  if openstack subnet show "$EXT_SUBNET" -f value -c id >/dev/null 2>&1; then
+    echo "[SKIP] subnet $EXT_SUBNET exists"
+  else
+    openstack subnet create --network "$EXT_NET" --subnet-range "$EXT_CIDR" \
+      --gateway "$GW" --no-dhcp --allocation-pool start="$FIP_START",end="$FIP_END" \
+      "$EXT_SUBNET" -f value -c id
+    openstack subnet set --tag role=provider --tag "netbox-iprange=${FIP_START}-${FIP_END}" "$EXT_SUBNET"
+    echo "[OK] subnet $EXT_SUBNET created + tagged"
+  fi
+  echo "=== CONFIRM ==="
+  openstack network show "$EXT_NET" -f json | jq -c '{name, external: ."router:external", type: ."provider:network_type", physnet: ."provider:physical_network", shared, tags}'
+  openstack subnet show "$EXT_SUBNET" -f json | jq -c '{name, cidr, gateway_ip, enable_dhcp, allocation_pools, tags}'
+)
+```
+GATE: `provider-ext` external=true, type=flat, physnet=physnet1, shared=false;
+`provider-ext-fip` cidr=10.12.4.0/22, gateway 10.12.4.1, enable_dhcp=false,
+allocation_pools=[10.12.5.0-10.12.7.254].
+
+---
+
+## EXIT GATE (phase-04 complete)
+- `provider-ext` (external, flat/physnet1, not shared) + `provider-ext-fip` (full /22,
+  FIP allocation pool, no-dhcp) present and tagged role=provider.
+- FIP allocation + tenant router gateways are now possible (needed by phase-06 mgmt
+  VM FIP, phase-08 cluster FIPs + LB validation).
+
+## As-built reference (2026-06-03 run -- audit trail)
+- network provider-ext = 70b34bb2-3afb-4b43-96d3-f520dbcbf9a8 (external, flat, physnet1, shared=false, role=provider)
+- subnet provider-ext-fip = e3afcbae-ec34-4125-9007-2bfa51851422
+  (cidr 10.12.4.0/22, gateway 10.12.4.1, enable_dhcp=false, alloc 10.12.5.0-10.12.7.254,
+   tags role=provider + netbox-iprange=10.12.5.0-10.12.7.254)
+- Transitional note: MAAS already carried the front-loaded VIP reservations (.2-.63
+  provider + .8.2-.63 metal; old D-020 .8.224-.254 gone) ahead of the bundle's interim
+  .50-.60 VIPs -- harmless (a reserved range blocks future auto-assign, does not evict
+  live VIPs). NetBox modeling DEFERRED (allocate after a clean deploy).
+
+## Next
+phase-05 -- octavia enablement.
diff --git a/runbooks/phase-05-octavia-enablement.md b/runbooks/phase-05-octavia-enablement.md
new file mode 100644
index 0000000..d89fb27
--- /dev/null
+++ b/runbooks/phase-05-octavia-enablement.md
@@ -0,0 +1,191 @@
+# Phase 05 -- Octavia Enablement (D-021)
+
+Bring Octavia from its post-deploy BLOCKED state to fully enabled: run the
+`configure-resources` action (control plane + lb-mgmt overlay), then build and tag
+the amphora image. End state: octavia active/idle with an ACTIVE amphora image
+whose tag matches `octavia amp-image-tag`. (The end-to-end LB build + round-robin +
+failover validation is D-011 criterion 4, run in phase-08 once tenant scaffolding
+exists.)
+
+Decisions: D-021 (amphora pipeline; control-plane then image). Troubleshooting:
+appendix-A -- L7 (snap cannot read /tmp), octavia-configure-resources (long-running
+/ o-hm0 transient), amp-image-tag-mismatch (LP#1937003).
+
+IP-ONLY NOTE (supersedes the 2026-05-30 octavia capture): the 05-30 `/etc/hosts`
+FQDN prereq DOES NOT APPLY here. This deploy is IP-only (R18 catalog) and octavia
+is multi-homed (reaches the provider VIPs over its eth1), so `configure-resources`
+needs no hosts/FQDN prep -- just the action.
+
+---
+
+## Prerequisites (must be true entering phase-05)
+- phase-04 (core/network) done; vault cert cascade complete so `octavia:certificates`
+  is satisfied. octavia/0 sits BLOCKED "Awaiting ... configure-resources" (the
+  expected post-deploy state, D-021) -- this phase clears it.
+- Glance reachable from the jumphost (provider VIP) to seed the amphora base.
+- Bundle-baked octavia config (verify in 5.2 gate): `octavia-diskimage-retrofit`
+  `use-internal-endpoints=true` + `image-format=raw` + `amp-image-tag=octavia-amphora`,
+  and `octavia amp-image-tag=octavia-amphora` (the two MUST match -- LP#1937003).
+
+## Constants and env-literals (TAG: confirm per site on rebuild)
+- `ENV(octavia-tag)`   octavia-amphora   (octavia + retrofit amp-image-tag; must match)
+- `ENV(base-name)`     jammy-amphora-base (uploaded base; NOT amphora-tagged)
+- `ENV(retrofit)`      octavia-diskimage-retrofit
+- `ENV(internal-glance-vip)` 10.12.8.53  (retrofit is metal-only 10.12.8.x -> internal glance)
+- run-specific: base image id, amphora image id, op/task ids (capture at run).
+
+## Run-location legend
+- `# RUN: jumphost` -- vopenstack-jesse as jessea123, admin-openrc sourced; all
+  octavia work is `juju run` / `juju config` / `openstack` from the jumphost.
+
+---
+
+## Step 5.1 -- configure-resources (D-021 Phase 1; control plane + lb-mgmt overlay)
+`# RUN: jumphost`  Read-only pre-check, then the argument-free action with a bound
+wait, then authoritative completion via show-operation (NOT the streamed log).
+
+```bash
+( {
+  source ~/admin-openrc
+  echo "=== pre-check (verify-before-mutate) ==="
+  juju status octavia -m openstack | grep -E 'octavia/0' || true   # expect BLOCKED Awaiting configure-resources
+  juju actions octavia -m openstack | grep -i configure-resources  # action exists; takes NO params
+  echo "--- idempotency: charm-octavia-tagged resources should be EMPTY pre-run ---"
+  openstack network list --tags charm-octavia -f value -c Name        # expect empty
+  openstack security group list --tags charm-octavia -f value -c Name # expect empty
+  openstack loadbalancer provider list                                # expect amphora present (API reachable)
+} )
+```
+Run the action (long-running; juju's default wait may time out but the hook keeps
+going -- use a bound `--wait` and tee; do NOT re-fire on a wait-timeout -- appendix-A:
+octavia-configure-resources):
+```bash
+juju run octavia/leader configure-resources -m openstack --wait=20m 2>&1 | tee ~/octavia-configure-resources.out
+```
+Authoritative completion + A/B/C verify:
+```bash
+( {
+  source ~/admin-openrc
+  echo "=== 0. authoritative status (use show-operation, not the streamed log) ==="
+  # juju show-operation <N>  -> operation status: completed AND its task: completed
+  echo "=== A. octavia active/idle 'Unit is ready' (blocked cleared) ==="
+  juju status octavia -m openstack | grep -E 'octavia/0'
+  echo "=== B. resources created by the action (were empty pre-run) ==="
+  openstack network list --tags charm-octavia -f value -c Name        # lb-mgmt-net
+  openstack subnet list  --tags charm-octavia -f value -c Name        # lb-mgmt-subnetv6 (IPv6 geneve design)
+  openstack security group list --tags charm-octavia -f value -c Name # lb-mgmt-sec-grp
+  echo "=== C. o-hm0 up (IPv6-ULA on lb-mgmt prefix; a br-int port) ==="
+  juju exec --unit octavia/0 -m openstack -- 'ip -br addr show o-hm0; sudo ovs-vsctl get Interface o-hm0 external_ids' </dev/null
+} )
+```
+GATE: octavia/0 active/idle; lb-mgmt-net + lb-mgmt-subnetv6 + lb-mgmt-sec-grp present;
+o-hm0 has an fc00::/.. IPv6-ULA addr and is a br-int port. (NORMAL, not faults: the
+lb-mgmt-net is IPv6-ULA by design; a "Virtual network ... down" transient during
+o-hm0 bring-up self-heals; the lb-mgmt `network:distributed` port shows DOWN.)
+
+## Step 5.2 -- Amphora image pipeline (D-021 Phase 2; canonical block)
+`# RUN: jumphost`  This is the verified canonical block (06-03). One `( set -e )`
+subshell: config GATE -> idempotent seed (base staged in `$HOME`, NOT /tmp -- the
+openstack snap cannot read /tmp, appendix-A: L7) -> retrofit build -> confirm.
+Fully idempotent (amphora present -> skip to confirm; base present -> retrofit only;
+fresh -> download+checksum+upload+retrofit). For a FIRST live run in a new
+environment you may stop after the seed to eyeball before the multi-minute build.
+
+```bash
+# Tunables (operator-confirm the first two for your environment):
+BASE_IMG_URL="https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img"
+BASE_SUM_URL="https://cloud-images.ubuntu.com/jammy/current/SHA256SUMS"
+BASE_IMG_FILE="jammy-server-cloudimg-amd64.img"
+BASE_NAME="jammy-amphora-base"                     # ENV(base-name); NOT amphora-tagged (only the retrofit OUTPUT is)
+VERSION_NAME="$(date -u +%Y%m%d)"                  # cosmetic (D-021): feeds the retrofit OUTPUT name
+PRODUCT_NAME="com.ubuntu.cloud:server:22.04:amd64" # cosmetic (D-021): metadata only
+RETRO=octavia-diskimage-retrofit                   # ENV(retrofit)
+STAGE="$HOME/amphora-base"                          # snap-READABLE (home iface); NOT /tmp (L7)
+
+( set -e
+  source ~/admin-openrc
+
+  # ---- Phase 0: config GATE (abort if the cloud is not in the expected state) ----
+  UIE=$(juju config $RETRO use-internal-endpoints)
+  IMGFMT=$(juju config $RETRO image-format)
+  RTAG=$(juju config $RETRO amp-image-tag)
+  OTAG=$(juju config octavia amp-image-tag)
+  [ "$UIE" = true ]   || { echo "GATE FAIL: $RETRO use-internal-endpoints=$UIE (need true; retrofit is metal-only)"; exit 1; }
+  [ "$IMGFMT" = raw ] || { echo "GATE FAIL: $RETRO image-format=$IMGFMT (need raw; Ceph RBD fast-clone)"; exit 1; }
+  [ -n "$RTAG" ] && [ "$RTAG" = "$OTAG" ] || { echo "GATE FAIL: amp-image-tag mismatch retrofit='$RTAG' octavia='$OTAG' (LP#1937003)"; exit 1; }
+  echo "[OK] config gate: use-internal-endpoints=true image-format=raw amp-image-tag=$OTAG"
+
+  # ---- Phase 1: idempotency + seed the jammy base (only if no amphora AND no base) ----
+  AMPH=$(openstack image list --tag "$OTAG" -f value -c ID | head -1)
+  if [ -n "$AMPH" ]; then
+    echo "[SKIP] image already tagged $OTAG ($AMPH) -- pipeline complete; jumping to confirm"
+  else
+    BASE_ID=$(openstack image list --name "$BASE_NAME" -f value -c ID | head -1)
+    if [ -z "$BASE_ID" ]; then
+      mkdir -p "$STAGE"; LOCAL="$STAGE/$BASE_IMG_FILE"
+      EXP=$(curl -fsSL "$BASE_SUM_URL" | awk -v f="$BASE_IMG_FILE" '$2=="*"f || $2==f {print $1}')
+      [ -n "$EXP" ] || { echo "GATE FAIL: no published checksum for $BASE_IMG_FILE"; exit 1; }
+      if [ -f "$LOCAL" ] && [ "$(sha256sum "$LOCAL" | awk '{print $1}')" = "$EXP" ]; then
+        echo "[OK] staged base present + checksum-valid; skipping download"
+      else
+        echo "[..] downloading jammy base to $LOCAL (snap-readable; NOT /tmp)"
+        wget -q -O "$LOCAL" "$BASE_IMG_URL"
+        GOT=$(sha256sum "$LOCAL" | awk '{print $1}')
+        [ "$EXP" = "$GOT" ] || { echo "GATE FAIL: checksum mismatch exp='$EXP' got='$GOT'"; exit 1; }
+        echo "[OK] checksum verified ($GOT)"
+      fi
+      echo "[..] uploading base to glance (qcow2; 5 retrofit props; NO amphora tag on the base)"
+      BASE_ID=$(openstack image create "$BASE_NAME" \
+        --file "$LOCAL" --disk-format qcow2 --container-format bare \
+        --property architecture=x86_64 --property os_distro=ubuntu --property os_version=22.04 \
+        --property version_name="$VERSION_NAME" --property product_name="$PRODUCT_NAME" \
+        -f value -c id)
+    fi
+    [ -n "$BASE_ID" ] || { echo "GATE FAIL: base image id empty after seed"; exit 1; }
+    echo "[OK] base image: $BASE_ID"
+
+    # ---- Phase 2: retrofit (long-running build; bounded wait; tee the result) ----
+    echo "-- retrofit-image action schema (informational; confirm source-image is honored) --"
+    juju actions $RETRO --schema --format yaml 2>&1 | sed -n '/retrofit-image:/,/^[a-zA-Z]/p' | head -30 || true
+    echo "[..] running retrofit-image (multi-minute build)"
+    juju run $RETRO/leader retrofit-image source-image="$BASE_ID" --wait=30m 2>&1 | tee ~/retrofit-image.out
+  fi
+
+  # ---- Phase 3: confirm (amphora present + active + tagged == octavia's tag) ----
+  echo "=== CONFIRM: images tagged $OTAG ==="
+  openstack image list --tag "$OTAG" -f value -c ID -c Name -c Status
+  ACT=$(openstack image list --tag "$OTAG" -f value -c Status | grep -xc active || true)
+  [ "$ACT" -ge 1 ] || { echo "CONFIRM FAIL: no ACTIVE image tagged $OTAG"; exit 1; }
+  echo "[OK] amphora present + active + tagged $OTAG (matches octavia amp-image-tag) -- D-021 complete"
+)
+```
+GATE: an ACTIVE image tagged `octavia-amphora` whose tag matches `octavia amp-image-tag`.
+
+---
+
+## EXIT GATE (phase-05 complete)
+- octavia/0 active/idle; lb-mgmt-net / lb-mgmt-subnetv6 / lb-mgmt-sec-grp present; o-hm0 up.
+- An ACTIVE amphora image tagged `octavia-amphora`, tag matching `octavia amp-image-tag`.
+- Octavia is fully enabled (control plane + amphora). The end-to-end LB validation
+  (build -> listener -> pool -> health-monitor -> 2 members -> FIP; round-robin;
+  admin-scope failover) is D-011 criterion 4 -- run in phase-08 (needs tenant
+  scaffolding + the external provider network from phase-04).
+
+## As-built reference (2026-06-03 run -- audit trail)
+- octavia/0: octavia 14.0.0, charm rev 441 2024.1/stable, on 3/lxd/3, data leg 10.12.12.1;
+  multi-homed (reaches provider VIPs over eth1).
+- configure-resources op 15 / task 16 completed (--wait=20m). Created lb-mgmt-net
+  (d1ee4bca-...), lb-mgmt-subnetv6 (1c1f50df-..., IPv6 geneve), lb-mgmt-sec-grp (acbacb21-...).
+  o-hm0 fc00:9c49:5b4e:cf23:f816:3eff:fead:56df/64, br-int port.
+- amphora: retrofit is metal-only (10.12.8.172) -> internal glance VIP 10.12.8.53.
+  base jammy-amphora-base uploaded (f8b48cdb-...); retrofit op 19/task 20 built
+  amphora-haproxy-x86_64-ubuntu-22.04-20260603 (4e4a94ac-...), ACTIVE, tag octavia-amphora
+  (matches octavia amp-image-tag). image-format raw.
+- Charm gap (parked): glance-simplestreams-sync is metal-only and cannot reach glance
+  on a no-DNS deploy (use-internal-endpoints steers keystone auth but not the
+  glance/swift client) -> gss does NOT seed the base. The base is seeded manually
+  (above) and the amphora BUILD stays charm-native via the retrofit over internal
+  endpoints. Roosevelt root-fix: cloud DNS + FQDN-valid certs (also fixes gss).
+
+## Next
+phase-06 -- in-cloud management cluster (D-035).
diff --git a/runbooks/phase-06-incloud-mgmt-cluster.md b/runbooks/phase-06-incloud-mgmt-cluster.md
new file mode 100644
index 0000000..38dd469
--- /dev/null
+++ b/runbooks/phase-06-incloud-mgmt-cluster.md
@@ -0,0 +1,389 @@
+# Phase 06 -- In-Cloud Management Cluster (D-035)
+
+Stand up the CAPI/Magnum management cluster as a single-homed in-cloud tenant
+VM (`capi-mgmt-v2`), bootstrap k8s-snap on it, prove pod egress through the
+hard gate, and install the pinned CAPI provider stack. This is the persistent
+v1 management cluster -- there is NO clusterctl move/pivot.
+
+Decisions: D-035 (in-cloud single-homed tenant VM; retires D-033/D-017),
+D-034 (CAPI versions sourced from the capi-helm-charts tag's dependencies.json,
+never hardcoded), D-031 (Magnum + magnum-capi-helm + capi-helm-charts engine).
+Troubleshooting: appendix-A entries DOCFIX-021, DOCFIX-024, DOCFIX-025a, D-035.
+
+---
+
+## Prerequisites (must be true entering phase-06)
+- Charmed OpenStack live and verified (phase-03 done); Keystone reachable on the
+  provider VIP.
+- The external provider network exists (phase-04 done) -- the mgmt FIP in Step 6.2 is
+  allocated from it. Octavia is NOT required for the mgmt cluster itself (its apiserver
+  is reached via the FIP directly); Octavia is a phase-08 prereq for workload clusters.
+- `admin-openrc` sourced on the jumphost; `openstack`, `jq`, `kubectl` available.
+- The `capi-mgmt` Keystone project exists. The Magnum trustee domain is auto-configured
+  by the magnum charm via its keystone (identity-credentials) relation -- verify [trust]
+  (trustee_domain_id / trustee_domain_admin_id / trustee_domain_admin_password) is
+  populated in magnum.conf; no manual step.
+- No `capi-mgmt-net` tenant network yet (this phase creates it).
+
+## Constants and env-literals (TAG: regenerate/confirm per site on rebuild)
+Literals below are tagged `ENV(...)` so the later generalization pass is
+mechanical. Discover everything else dynamically at run time.
+- `ENV(project)`     capi-mgmt           (id 674171fd28d446d3a37073b6a761e910)
+- `ENV(ext-net)`     provider-ext        (id 70b34bb2-3afb-4b43-96d3-f520dbcbf9a8)
+- `ENV(image)`       ubuntu-24.04-noble  (id c66342ce-f402-4e6e-a324-ae27032396d7)
+- `ENV(flavor)`      gp.large            (16384 MB / 4 vCPU / 80 GB)
+- `ENV(mgmt-cidr)`   10.20.0.0/24        (capi-mgmt-subnet; overlay, non-IPAM)
+- `ENV(keystone-vip)` 10.12.4.50:5000    (the gate target -- the deployed VIP)
+- `ENV(mgmt-fip)`    10.12.7.40          (assigned in 6.2; apiserver SAN)
+- `ENV(pod-cidr)`    10.1.0.0/16   `ENV(svc-cidr)` 10.152.183.0/24  (snap defaults; non-colliding)
+- `ENV(capi-tag)`    0.25.1              (capi-helm-charts release; dependencies.json source)
+
+## Run-location legend (every block states where it runs)
+- `# RUN: jumphost`   -- on vopenstack-jesse as jessea123, admin-openrc sourced.
+- `# RUN: mgmt VM`    -- shipped to the VM over SSH via the FIP (heredoc below).
+- VM SSH form (used verbatim throughout; DOCFIX-021 `</dev/null` on every sudo):
+  `ssh -i ~/.ssh/id_ed25519 -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 ubuntu@10.12.7.40 bash -s <<'REOF' ... REOF`
+
+---
+
+## Step 6.0 -- Keypair + security group (capi-mgmt project)
+`# RUN: jumphost`  Safe/idempotent setup -- consolidated. (LIVE-REVIEW: exact
+SG rule syntax is standard openstack-client; confirm on the redeploy test.)
+
+```bash
+( {
+  set -u
+  PROJ=capi-mgmt                                   # ENV(project)
+  echo "=== keypair (import the jumphost pubkey) ==="
+  openstack keypair show capi-mgmt-key >/dev/null 2>&1 \
+    || openstack keypair create --public-key ~/.ssh/id_ed25519.pub capi-mgmt-key
+  echo "=== security group capi-mgmt-sg (ingress 22 + 6443; egress default-allow) ==="
+  openstack security group show capi-mgmt-sg >/dev/null 2>&1 \
+    || openstack security group create --project "$PROJ" capi-mgmt-sg
+  SG=$(openstack security group show capi-mgmt-sg -f value -c id)
+  # add rules only if absent (re-run safe)
+  openstack security group rule list "$SG" -f value -c "Port Range" | grep -q '^22:22' \
+    || openstack security group rule create --proto tcp --dst-port 22   "$SG"
+  openstack security group rule list "$SG" -f value -c "Port Range" | grep -q '^6443:6443' \
+    || openstack security group rule create --proto tcp --dst-port 6443 "$SG"
+  echo "=== verify ==="
+  openstack security group rule list "$SG" -f value -c Protocol -c "Port Range"
+} )
+```
+Expect: `capi-mgmt-key` present; `capi-mgmt-sg` with tcp/22 and tcp/6443 ingress.
+
+## Step 6.1 -- Network, subnet, router (capi-mgmt project)
+`# RUN: jumphost`  Idempotent network plumbing -- consolidated. DNS nameservers
+1.1.1.1/1.0.0.1 (D-019: public resolvers; image pulls need internet egress).
+
+```bash
+( {
+  set -u
+  PROJ=capi-mgmt                                   # ENV(project)
+  EXT=provider-ext                                 # ENV(ext-net)
+  echo "=== network capi-mgmt-net ==="
+  openstack network show capi-mgmt-net >/dev/null 2>&1 \
+    || openstack network create --project "$PROJ" capi-mgmt-net
+  echo "=== subnet capi-mgmt-subnet 10.20.0.0/24 ==="   # ENV(mgmt-cidr)
+  openstack subnet show capi-mgmt-subnet >/dev/null 2>&1 \
+    || openstack subnet create --project "$PROJ" --network capi-mgmt-net \
+         --subnet-range 10.20.0.0/24 \
+         --dns-nameserver 1.1.1.1 --dns-nameserver 1.0.0.1 capi-mgmt-subnet
+  echo "=== router capi-mgmt-router + ext-gw + subnet ==="
+  openstack router show capi-mgmt-router >/dev/null 2>&1 \
+    || openstack router create --project "$PROJ" capi-mgmt-router
+  openstack router set --external-gateway "$EXT" capi-mgmt-router
+  openstack router add subnet capi-mgmt-router capi-mgmt-subnet 2>/dev/null || true
+  echo "=== verify ==="
+  openstack router show capi-mgmt-router -f value -c external_gateway_info -c status
+} )
+```
+Expect: subnet `10.20.0.0/24`; router `ACTIVE` with an external gateway on provider-ext.
+
+## Step 6.2 -- VM + floating IP (MUTATION; not batched with the gate)
+`# RUN: jumphost`  Creates the VM and pins the management FIP. The FIP is the
+stable apiserver endpoint for the jumphost AND the Magnum conductor.
+
+```bash
+( {
+  set -u
+  PROJ=capi-mgmt                                   # ENV(project)
+  EXT=provider-ext                                 # ENV(ext-net)
+  echo "=== create capi-mgmt-v2 (gp.large / ubuntu-24.04-noble) ==="
+  openstack server show capi-mgmt-v2 >/dev/null 2>&1 \
+    || openstack server create --image ubuntu-24.04-noble --flavor gp.large \
+         --network capi-mgmt-net --security-group capi-mgmt-sg \
+         --key-name capi-mgmt-key capi-mgmt-v2
+  echo "=== wait ACTIVE (re-run until ACTIVE) ==="
+  openstack server show capi-mgmt-v2 -f value -c status -c addresses
+  echo "=== floating ip on provider-ext, associate to the VM ==="
+  FIP=$(openstack floating ip create "$EXT" -f value -c floating_ip_address)
+  echo "allocated FIP=$FIP   # expect this to be 10.12.7.40 on a clean run -- ENV(mgmt-fip)"
+  openstack server add floating ip capi-mgmt-v2 "$FIP"
+  openstack server show capi-mgmt-v2 -f value -c addresses
+} )
+```
+Note: the tenant IP lands on `10.20.0.45` and the FIP on `10.12.7.40` on the
+as-built run. If the FIP differs on rebuild, carry the new value into 6.4
+(`extra-sans`) and 6.5 (kubeconfig server) and phase-07 (conductor kubeconfig).
+
+## Step 6.3 -- GATE 1: OS-level egress (before any k8s investment)
+`# RUN: mgmt VM`  This is the premise of D-035. PROCEED ONLY IF VIP-OK.
+
+```bash
+ssh -i ~/.ssh/id_ed25519 -o BatchMode=yes -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 ubuntu@10.12.7.40 bash -s <<'REOF'
+set -u
+echo "=== VM -> Keystone VIP 10.12.4.50:5000 ==="            # ENV(keystone-vip)
+timeout 6 bash -c 'exec 3<>/dev/tcp/10.12.4.50/5000' && echo VIP-OK || echo VIP-FAIL
+echo "=== VM -> internet 1.1.1.1:443 (image pulls) ==="
+timeout 6 bash -c 'exec 3<>/dev/tcp/1.1.1.1/443' && echo NET-OK || echo NET-FAIL
+REOF
+```
+GATE: require `VIP-OK`. `NET-FAIL` means sort provider-ext internet egress (or a
+registry mirror) before 6.6. Do NOT build k8s on a VM that fails VIP-OK.
+(appendix-A: D-035 -- single-NIC removes the dual-homed reverse-path bug.)
+
+## Step 6.4 -- k8s-snap install + bootstrap (MUTATION; secret-free)
+`# RUN: mgmt VM`  Channel is `1.32-classic/stable` (NOT `1.32/stable` -- that is
+the charm-era track and does not exist for the snap). The bootstrap config MUST
+carry an explicit `cluster-config` block (appendix-A: DOCFIX-024 -- a config
+without it disables network+dns and the node never goes Ready). Every `sudo`
+gets `</dev/null` (appendix-A: DOCFIX-021 -- remote `bash -s` reads the script
+from stdin).
+
+```bash
+ssh -i ~/.ssh/id_ed25519 -o BatchMode=yes -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 ubuntu@10.12.7.40 bash -s <<'REOF'
+set -euo pipefail
+
+echo "=== install k8s snap 1.32-classic/stable ==="
+sudo snap install k8s --classic --channel=1.32-classic/stable </dev/null
+
+echo "=== write bootstrap config (DOCFIX-024: cluster-config block REQUIRED) ==="
+sudo tee /root/bootstrap-config.yaml >/dev/null <<'CFG'
+cluster-config:
+  network:
+    enabled: true
+  dns:
+    enabled: true
+pod-cidr: 10.1.0.0/16
+service-cidr: 10.152.183.0/24
+extra-sans:
+- 10.12.7.40
+- 10.20.0.45
+CFG
+sudo cat /root/bootstrap-config.yaml
+
+echo "=== bootstrap (timeout 10m) ==="
+sudo k8s bootstrap --name capi-mgmt-v2 --file /root/bootstrap-config.yaml --timeout 10m </dev/null
+
+echo "=== status ==="
+sudo k8s status --wait-ready --timeout 5m </dev/null
+REOF
+```
+Expect: `k8s status` reports cluster ready, network+dns enabled, one node.
+Retry path: `sudo snap remove k8s --purge </dev/null` then re-run this block.
+
+## Step 6.5 -- GATE 2: kubeconfig to jumphost + pod-egress proof (THE D-035 GATE)
+The agnhost pod-egress probe is the exact test the dual-homed D-033 node and the
+old k3s node FAILED. On this single-NIC VM it must `Completed`.
+
+```bash
+# RUN: mgmt VM -- emit a jumphost-facing kubeconfig (server = the FIP, not tenant IP)
+ssh -i ~/.ssh/id_ed25519 -o BatchMode=yes -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 ubuntu@10.12.7.40 \
+    "sudo k8s config server=https://10.12.7.40:6443 </dev/null" > ~/capi-mgmt.kubeconfig
+# [SENSITIVE] ~/capi-mgmt.kubeconfig contains a cluster-admin credential.
+wc -l ~/capi-mgmt.kubeconfig ; head -1 ~/capi-mgmt.kubeconfig   # expect >0 lines, "apiVersion: v1"
+```
+
+```bash
+# RUN: jumphost -- node check + the hard gate
+( {
+  set -u
+  export KUBECONFIG="$HOME/capi-mgmt.kubeconfig"
+  echo "=== node ==="
+  kubectl get nodes -o wide                          # expect capi-mgmt-v2 Ready, v1.32.13
+  echo "=== agnhost pod-egress probe -> Keystone VIP 10.12.4.50:5000 ==="
+  kubectl run egress-test --image=registry.k8s.io/e2e-test-images/agnhost:2.40 \
+    --restart=Never --command -- /agnhost connect 10.12.4.50:5000 --timeout=5s
+  echo "(poll the next line until STATUS=Completed)"
+  kubectl get pod egress-test -o jsonpath='{.status.phase} {.status.containerStatuses[0].state}{"\n"}'
+} )
+```
+GATE: require the probe pod `Completed` / `exitCode 0` (empty logs = clean TCP
+connect). That proves pod -> Cilium -> ens3 -> OVN -> router SNAT egress works.
+Then clean up the throwaway pod:
+
+```bash
+# RUN: jumphost
+KUBECONFIG="$HOME/capi-mgmt.kubeconfig" kubectl delete pod egress-test --now
+```
+
+## Step 6.6 -- CAPI provider stack (pinned to dependencies.json; D-034)
+`# RUN: mgmt VM`  Run VM-side as root with `KUBECONFIG=/root/kubeconfig` (local
+apiserver 10.20.0.45:6443) so the matched 1.32.13 kubectl is used -- avoids the
+jumphost kubectl's +3-minor skew. Versions are READ from the tag's
+dependencies.json, never hardcoded (D-034). The as-built pins are in the
+reference block below as a known-good cross-check only.
+
+HARDENED ORDER (appendix-A: D-034 install-ordering): cert-manager -> ORC ->
+`clusterctl init` -> CAAPH -> janitor. ORC precedes `clusterctl init` because
+CAPO v0.14.4's `openstackserver` controller hard-depends on ORC's
+`Image.openstack.k-orc.cloud` CRD; installing CAPO first crash-loops until ORC
+lands. (The 2026-06-08 run used ORC last and self-healed after 6 restarts -- the
+runbook corrects the order.)
+
+### 6.6a -- tooling + pins (install helm/clusterctl/kubectl VM-side; read dependencies.json @ 0.25.1)
+`# RUN: jumphost`  Installs the CAPI tooling on the mgmt VM at the dependencies.json
+pins and writes `~/capi-pins.env` (sourced by 6.6b-6.6f). kubectl is pinned to the
+cluster's 1.32.13 (no apiserver skew). The `SSH_OPTS`/`MGMT_VM` vars set here are reused
+by 6.6b-6.6f (same jumphost shell).
+```bash
+# define the mgmt-VM connection once (reused by 6.6b-6.6f)
+MGMT_VM=10.12.7.40
+SSH_OPTS="-i $HOME/.ssh/id_ed25519 -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"
+
+ssh $SSH_OPTS ubuntu@"$MGMT_VM" bash -s <<'REOF'
+set -euo pipefail
+sudo apt-get update -qq </dev/null && sudo apt-get install -y jq curl </dev/null
+
+# kubeconfig for the local apiserver (10.20.0.45:6443), readable by ubuntu -> helm/clusterctl/kubectl need no sudo
+mkdir -p "$HOME/.kube"; sudo k8s config </dev/null > "$HOME/.kube/config"; chmod 600 "$HOME/.kube/config"
+
+# egress pre-check (the VM pulls charts/binaries/manifests from these)
+for h in https://raw.githubusercontent.com https://get.helm.sh https://github.com https://dl.k8s.io; do
+  printf '%s -> ' "$h"; curl -s -o /dev/null -w '%{http_code}\n' "$h" || echo FAIL
+done
+
+# version constellation from the chart tag's dependencies.json (D-034; never hardcoded)
+curl -fsSL https://raw.githubusercontent.com/azimuth-cloud/capi-helm-charts/0.25.1/dependencies.json -o "$HOME/deps.json"
+CAPI=$(jq -r '."cluster-api"' "$HOME/deps.json")
+CAPO=$(jq -r '."cluster-api-provider-openstack"' "$HOME/deps.json")
+CERT=$(jq -r '."cert-manager"' "$HOME/deps.json")
+ORC=$(jq -r '."openstack-resource-controller"' "$HOME/deps.json")
+CAAPH=$(jq -r '."addon-provider"' "$HOME/deps.json")
+JANITOR=$(jq -r '."cluster-api-janitor-openstack"' "$HOME/deps.json")
+HELM=$(jq -r '.helm' "$HOME/deps.json")
+{ echo "CAPI=$CAPI"; echo "CAPO=$CAPO"; echo "CERT=$CERT"; echo "ORC=$ORC"; \
+  echo "CAAPH=$CAAPH"; echo "JANITOR=$JANITOR"; echo "HELM=$HELM"; } > "$HOME/capi-pins.env"
+echo "== pins (cross-check: CAPI v1.13.2 CAPO v0.14.4 CERT v1.20.2 ORC v2.5.0 CAAPH 0.12.0 JANITOR 0.11.0 HELM v3.17.3) =="
+cat "$HOME/capi-pins.env"
+
+# install helm (pinned), clusterctl (= CAPI pin), kubectl (= cluster 1.32.13)
+curl -fsSL "https://get.helm.sh/helm-${HELM}-linux-amd64.tar.gz" -o /tmp/helm.tgz
+sudo tar -xzf /tmp/helm.tgz -C /usr/local/bin --strip-components=1 linux-amd64/helm </dev/null
+curl -fsSL "https://github.com/kubernetes-sigs/cluster-api/releases/download/${CAPI}/clusterctl-linux-amd64" -o /tmp/clusterctl
+sudo install -m 0755 /tmp/clusterctl /usr/local/bin/clusterctl </dev/null
+curl -fsSL "https://dl.k8s.io/release/v1.32.13/bin/linux/amd64/kubectl" -o /tmp/kubectl
+sudo install -m 0755 /tmp/kubectl /usr/local/bin/kubectl </dev/null
+
+echo "== tooling =="; helm version --short; clusterctl version; kubectl version --client 2>/dev/null | head -1
+REOF
+```
+
+### 6.6b -- cert-manager (DOCFIX-025a: crds.enabled=true, NOT installCRDs)
+`# RUN: jumphost`
+```bash
+ssh $SSH_OPTS ubuntu@"$MGMT_VM" bash -s <<'REOF'
+set -euo pipefail
+source "$HOME/capi-pins.env"
+helm repo add jetstack https://charts.jetstack.io
+helm repo update
+helm upgrade --install cert-manager jetstack/cert-manager \
+  --namespace cert-manager --create-namespace \
+  --version "$CERT" --set crds.enabled=true --wait --timeout 5m
+kubectl -n cert-manager wait --for=condition=Available deploy --all --timeout=180s
+kubectl -n cert-manager get pods
+REOF
+```
+
+### 6.6c -- ORC (BEFORE clusterctl init; CAPO hard-depends on the ORC Image CRD)
+`# RUN: jumphost`  server-side apply (large CRDs). Manifest is the k-orc release
+`install.yaml` (D-034).
+```bash
+ssh $SSH_OPTS ubuntu@"$MGMT_VM" bash -s <<'REOF'
+set -euo pipefail
+source "$HOME/capi-pins.env"
+kubectl apply --server-side -f \
+  "https://github.com/k-orc/openstack-resource-controller/releases/download/${ORC}/install.yaml"
+kubectl -n orc-system wait --for=condition=Available deploy --all --timeout=180s
+kubectl get crd images.openstack.k-orc.cloud
+REOF
+```
+
+### 6.6d -- clusterctl init (core + kubeadm bootstrap/control-plane + CAPO)
+`# RUN: jumphost`  cert-manager already present -> clusterctl detects and skips it.
+```bash
+ssh $SSH_OPTS ubuntu@"$MGMT_VM" bash -s <<'REOF'
+set -euo pipefail
+source "$HOME/capi-pins.env"
+clusterctl init \
+  --core "cluster-api:${CAPI}" \
+  --bootstrap "kubeadm:${CAPI}" \
+  --control-plane "kubeadm:${CAPI}" \
+  --infrastructure "openstack:${CAPO}"
+for ns in capi-system capi-kubeadm-bootstrap-system capi-kubeadm-control-plane-system capo-system; do
+  echo "== $ns =="; kubectl -n "$ns" wait --for=condition=Available deploy --all --timeout=240s
+done
+REOF
+```
+
+### 6.6e -- CAAPH + janitor (azimuth helm charts; chart names from each repo Chart.yaml)
+`# RUN: jumphost`
+```bash
+ssh $SSH_OPTS ubuntu@"$MGMT_VM" bash -s <<'REOF'
+set -euo pipefail
+source "$HOME/capi-pins.env"
+helm repo add capi-addon   https://azimuth-cloud.github.io/cluster-api-addon-provider
+helm repo add capi-janitor https://azimuth-cloud.github.io/cluster-api-janitor-openstack
+helm repo update
+helm upgrade --install cluster-api-addon-provider capi-addon/cluster-api-addon-provider \
+  --namespace capi-addon-system --create-namespace --version "$CAAPH" --wait --timeout 5m
+helm upgrade --install cluster-api-janitor-openstack capi-janitor/cluster-api-janitor-openstack \
+  --namespace capi-janitor-system --create-namespace --version "$JANITOR" --wait --timeout 5m
+kubectl -n capi-addon-system   get pods
+kubectl -n capi-janitor-system get pods
+REOF
+```
+
+### 6.6f -- verify the stack
+`# RUN: jumphost`
+```bash
+ssh $SSH_OPTS ubuntu@"$MGMT_VM" bash -s <<'REOF'
+set -euo pipefail
+clusterctl version
+echo "== all controllers Running =="
+kubectl get pods -A | egrep 'capi-|capo-|cert-manager|orc-system|janitor|addon' || true
+echo "== key CRDs present =="
+kubectl get crd clusters.cluster.x-k8s.io \
+  openstackclusters.infrastructure.cluster.x-k8s.io \
+  kubeadmcontrolplanes.controlplane.cluster.x-k8s.io \
+  images.openstack.k-orc.cloud
+REOF
+```
+
+---
+
+## EXIT GATE (phase-06 complete)
+- GATE 1 VIP-OK and GATE 2 agnhost `Completed` both passed.
+- `capi-mgmt-v2` Ready (v1.32.13); `~/capi-mgmt.kubeconfig` (server = FIP) works from the jumphost.
+- All CAPI controllers Running; ORC `Image` CRD present; no crash-looping CAPO.
+- Proceed to phase-07 (conductor graft).
+
+## As-built reference (2026-06-08/09 run -- audit trail; values are run-specific)
+- VM `capi-mgmt-v2`: gp.large, ubuntu-24.04-noble; tenant IP 10.20.0.45 (ens3); FIP 10.12.7.40.
+- Net `capi-mgmt-net` / subnet `capi-mgmt-subnet` 10.20.0.0/24; router `capi-mgmt-router`.
+- k8s-snap: 1.32-classic/stable, rev 5326, v1.32.13 (classic confinement); CNI Cilium 1.17.12-ck0.
+- pod CIDR 10.1.0.0/16; svc CIDR 10.152.183.0/24; cluster DNS 10.152.183.31.
+- GATE 2: probe pod 10.1.0.150 -> 10.12.4.50:5000, exitCode 0 / Completed (agnhost:2.40, ~9s pull).
+- Pins (capi-helm-charts 0.25.1 dependencies.json): CAPI v1.13.2 | CAPO v0.14.4 |
+  cert-manager v1.20.2 | CAAPH 0.12.0 | janitor 0.11.0 | ORC v2.5.0 | helm v3.17.3.
+  CAAPH/janitor deploy SHA-pinned images: 62f7c00 / d527847.
+- Tooling VM-side: helm v3.17.3, clusterctl v1.13.2, matched kubectl 1.32.13 (KUBECONFIG=/root/kubeconfig).
+
+## Next
+phase-07 -- conductor graft: place ~/capi-mgmt.kubeconfig at /etc/magnum/kubeconfig
+on magnum/0 and stage the [capi_helm] conf.d drop-in (D-037), pointing the
+conductor at the FIP.
diff --git a/runbooks/phase-07-conductor-graft.md b/runbooks/phase-07-conductor-graft.md
new file mode 100644
index 0000000..d36a162
--- /dev/null
+++ b/runbooks/phase-07-conductor-graft.md
@@ -0,0 +1,328 @@
+# Phase 07 -- Magnum Conductor Graft (D-031 / D-037 / D-042)
+
+Graft the magnum-capi-helm CAPI driver onto the charm-managed conductor
+(`magnum/0`), point it at the in-cloud management cluster (phase-06) via the
+FIP, and land on a CONTRACT-COHERENT driver so `coe cluster` health reports
+`HEALTHY`. The driver upgrade (D-042) is part of the v1 baseline here, not a
+follow-up -- the as-first-built 1.3.0 read the version-less v1beta2
+`infrastructureRef` and reported a cosmetic UNHEALTHY; it is superseded by the
+RELEASED `magnum-capi-helm==1.4.0`, which is the v1 end state.
+
+Decisions: D-031 (driver/engine/surface), D-037 (conf.d drop-in + config-dir via
+/etc/default, NOT a systemd ExecStart drop-in), D-042 (driver must be
+contract-coherent with the Layer-A core; amends D-034). D-036 (driver/engine/
+chart coherence). Troubleshooting: appendix-A DOCFIX-021, D-037, D-042, and
+lessons L-P6-1..4.
+
+---
+
+## Prerequisites (must be true entering phase-07)
+- phase-06 EXIT GATE passed: `capi-mgmt-v2` Ready, CAPI stack up (ORC `Image` CRD
+  present, no crash-looping CAPO), `~/capi-mgmt.kubeconfig` (server = FIP) works
+  from the jumphost.
+- Magnum charm live (`magnum/0`); the Keystone trustee domain is auto-configured by the
+  magnum charm via its keystone (identity-credentials) relation -- verify [trust]
+  (trustee_domain_id / trustee_domain_admin_id / trustee_domain_admin_password) is
+  populated in magnum.conf; no manual step.
+- `admin-openrc` on the jumphost; `juju` (model openstack); `jq`.
+
+## Constants and env-literals (TAG: confirm per site on rebuild)
+- `ENV(conductor-unit)` magnum/0        (LXD 1/lxd/2 on openstack1; addr 10.12.4.76)
+- `ENV(conductor-src)`  10.12.4.76/32   (the conductor's provider IP; SG source)
+- `ENV(mgmt-fip)`       10.12.7.40       (mgmt apiserver; kubeconfig server)
+- `ENV(mgmt-sg)`        capi-mgmt-sg     (in the capi-mgmt project)
+- `ENV(project)`        capi-mgmt        (id 674171fd28d446d3a37073b6a761e910)
+- `ENV(magnum-ns)`      magnum-674171fd28d446d3a37073b6a761e910  (driver namespace per project)
+- `ENV(chart-ver)`      0.25.1           (capi-helm-charts; load-bearing -- driver default is 0.10.1)
+- `ENV(helm-ver)`       v3.17.3
+
+## Run-location legend
+- `# RUN: jumphost`            -- vopenstack-jesse as jessea123 (admin-openrc).
+- `# RUN: jumphost -> magnum/0`-- shipped to the conductor via `juju ssh -m openstack magnum/0 '...' </dev/null`
+  (DOCFIX-021: `</dev/null` on every juju ssh / sudo so the remote command does not eat the heredoc/pipe).
+- Conductor facts: DEB install (magnum 18.0.1, python3.10, container base ubuntu 22.04);
+  conductor runs as user `magnum`; daemon launched by an LSB init script wrapped by
+  systemd `systemd-start` (NOT a direct ExecStart) -- see Step 7.7.
+
+---
+
+## Step 7.1 -- Authorize the conductor source on the mgmt-cluster SG
+`# RUN: jumphost` (scoped to the capi-mgmt project). Idempotent.
+
+```bash
+( {
+  set -u
+  # scope openstack CLI to the capi-mgmt project (id form -- robust to name/domain)
+  source ~/admin-openrc
+  unset OS_PROJECT_NAME OS_PROJECT_ID OS_TENANT_NAME OS_TENANT_ID
+  export OS_PROJECT_ID=674171fd28d446d3a37073b6a761e910      # ENV(project)
+  SG=$(openstack security group show capi-mgmt-sg -f value -c id)   # ENV(mgmt-sg)
+  echo "SG=$SG"
+  echo "=== add ingress tcp/6443 from the conductor 10.12.4.76/32 (if absent) ==="
+  openstack security group rule list "$SG" -f value -c "IP Range" -c "Port Range" \
+    | grep -q '10.12.4.76/32 6443:6443' \
+    || openstack security group rule create --proto tcp --dst-port 6443 \
+         --remote-ip 10.12.4.76/32 "$SG"
+  openstack security group rule list "$SG" -f value -c Protocol -c "Port Range" -c "IP Range"
+} )
+```
+Then prove conductor -> mgmt apiserver reachability:
+```bash
+# RUN: jumphost -> magnum/0
+juju ssh -m openstack magnum/0 \
+  "timeout 6 bash -c 'exec 3<>/dev/tcp/10.12.7.40/6443' && echo TCP-OK || echo TCP-FAIL" </dev/null
+```
+GATE: require `TCP-OK`. (Pre-existing jumphost rules tcp/22+6443 from 10.12.4.1/32 remain.)
+
+## Step 7.2 -- Place the mgmt kubeconfig on the conductor [SENSITIVE; not batched]
+`# RUN: jumphost -> magnum/0`  The source `~/capi-mgmt.kubeconfig` already has its
+server rewritten to the FIP (phase-06 6.5). Transfer base64-piped straight into a
+root-written 0600 file owned by the conductor user -- never stage the admin
+kubeconfig in /tmp (appendix-A: L-P6-4).
+
+```bash
+# discover the conductor service user (expect: magnum)
+juju ssh -m openstack magnum/0 'systemctl show magnum-conductor -p User --value' </dev/null
+
+# transfer (umask 077; chown to the discovered user; 0600)
+# NOTE: NO trailing </dev/null here -- stdin IS the payload. A </dev/null would
+# override the pipe (SC2259) and silently write an EMPTY kubeconfig while the
+# && chain still exits 0. DOCFIX-021 applies only to commands whose stdin is
+# NOT in use; the discovery line above keeps it, this pipe must not.
+base64 ~/capi-mgmt.kubeconfig | juju ssh -m openstack magnum/0 \
+  "sudo bash -c 'umask 077; base64 -d > /etc/magnum/kubeconfig && \
+   getent passwd magnum >/dev/null && chown magnum: /etc/magnum/kubeconfig && \
+   chmod 0600 /etc/magnum/kubeconfig'"
+
+# verify byte-exact (hashes must match before proceeding)
+sha256sum ~/capi-mgmt.kubeconfig
+juju ssh -m openstack magnum/0 'sudo sha256sum /etc/magnum/kubeconfig' </dev/null
+```
+GATE: the two sha256 hashes are identical (an empty or truncated transfer fails here,
+not three steps later as a confusing conductor auth error).
+End-to-end proof (the conductor user authenticates to the mgmt cluster via the FIP):
+```bash
+juju ssh -m openstack magnum/0 \
+  'sudo -u magnum env HOME=/tmp helm --kubeconfig /etc/magnum/kubeconfig list -A' </dev/null
+```
+Expect: the mgmt-cluster helm releases listed (cert-manager, ck-dns, ck-network
+cilium, cluster-api-addon-provider, cluster-api-janitor-openstack, metrics-server).
+GATE: a populated list = reach + auth OK. (Hardening, Roosevelt: replace this
+cluster-admin kubeconfig with a scoped ServiceAccount kubeconfig.)
+
+## Step 7.3 -- Confirm the driver target + served CAPI versions (D-042)
+`# RUN: jumphost` + jumphost kubectl. The fix is the RELEASED tag
+`magnum-capi-helm==1.4.0` (the "generalize-api-resources" feature). 1.3.0 read the
+version-less v1beta2 `infrastructureRef` and failed the health GET; 1.4.0 resolves each
+resource query as `api_resources.get(<Kind>,{}).get("api_version", <code-default>)`,
+where the driver's CODE defaults are v1beta1 for every CAPI core kind (Cluster /
+MachineDeployment / Machine -> cluster.x-k8s.io/v1beta1; OpenstackCluster ->
+infrastructure.cluster.x-k8s.io/v1beta1; K8sControlPlane ->
+controlplane.cluster.x-k8s.io/v1beta1). IMPORTANT: the `api_resources` OPTION itself
+defaults to an EMPTY map `{}` -- the v1beta1 values are code-level fallbacks, NOT option
+defaults. This cluster serves v1beta1 (CAPI v1.13 still serves it; unserved only in
+v1.16), so an empty `api_resources` yields v1beta1 lookups that match -- no per-kind
+override needed.
+
+Sanity-confirm v1beta1 is served per group before installing:
+```bash
+( {
+  export KUBECONFIG="$HOME/capi-mgmt.kubeconfig"
+  for g in cluster.x-k8s.io controlplane.cluster.x-k8s.io infrastructure.cluster.x-k8s.io \
+           bootstrap.cluster.x-k8s.io addons.cluster.x-k8s.io; do
+    echo "== $g =="; kubectl api-resources --api-group="$g" 2>/dev/null | awk 'NR==1 || /v1beta1/'
+  done
+} )
+#   Expect v1beta1 for: cluster.x-k8s.io (Cluster/MachineDeployment/Machine),
+#   controlplane.cluster.x-k8s.io (KubeadmControlPlane), infrastructure.cluster.x-k8s.io
+#   (OpenStackCluster -- verified anchor). If a CORE kind serves ONLY v1beta2, override
+#   just that kind via api_resources in Step 7.6; otherwise the defaults work as-is.
+```
+
+## Step 7.4 -- Install the driver (1.4.0) + helm in the conductor container
+`# RUN: jumphost -> magnum/0`  `--no-deps` preserves the deb-managed oslo stack (no
+PEP668 issue on the 22.04 container).
+
+```bash
+# egress pre-check
+juju ssh -m openstack magnum/0 \
+  'curl -s -o /dev/null -w "pypi:%{http_code}\n" https://pypi.org/simple/ ; \
+   curl -s -o /dev/null -w "helm:%{http_code}\n" https://get.helm.sh/' </dev/null
+
+# helm v3.17.3 (if not already present from a prior graft)
+juju ssh -m openstack magnum/0 'command -v helm && helm version --short || echo "helm absent -- install v3.17.3 from get.helm.sh tarball to /usr/local/bin/helm"' </dev/null
+
+# install the RELEASED contract-coherent driver (supersedes 1.3.0)
+juju ssh -m openstack magnum/0 'sudo python3 -m pip install --no-deps --upgrade "magnum-capi-helm==1.4.0"' </dev/null
+
+# verify the install + entry point
+juju ssh -m openstack magnum/0 \
+  'pip show magnum-capi-helm | egrep "Version|Location"; \
+   python3 -c "import importlib.metadata as m; print([e.name for e in m.entry_points(group=\"magnum.drivers\")])"' </dev/null
+```
+Expect: Version 1.4.0; `k8s_capi_helm_v1` present in the entry points.
+
+## Step 7.5 -- api_resources (D-042; set EXPLICITLY to an empty map on this cluster)
+1.4.0 exposes ONE [capi_helm] option for this -- `api_resources`, a JSON string mapping
+CAPI kinds (Cluster, OpenstackCluster, MachineDeployment, K8sControlPlane, Machine,
+Manifests, HelmRelease) to `{api_version, plural_name}`. The driver's CODE falls back to
+v1beta1 for every CAPI core kind when that kind is absent from the map (Step 7.3), and
+this cluster serves v1beta1 -- so the map's CONTENTS are empty here. But set it
+EXPLICITLY to `{}` in the drop-in (Step 7.6) rather than omit it: the option's registered
+default is a Python dict `{}` and the driver runs `json.loads()` on the value, so an
+explicit string `{}` avoids depending on how oslo coerces a non-string default (not
+empirically testable in the build environment -- explicit-set is the safe choice).
+Override a specific kind ONLY if Step 7.3 showed it serves ONLY v1beta2, e.g.
+`api_resources = {"Cluster": {"api_version": "cluster.x-k8s.io/v1beta2"}}`.
+
+## Step 7.6 -- Stage the [capi_helm] conf.d drop-in (D-037)
+`# RUN: jumphost -> magnum/0`  0644 root, NO secrets (it points at the 0600
+kubeconfig). The `default_helm_chart_version = 0.25.1` line is LOAD-BEARING (driver
+built-in default is `0.10.1`, the retired v1alpha6-era chart). `api_resources` is set to
+an explicit empty map `{}` (Step 7.5 -- the driver's code falls back to v1beta1 for every
+CAPI kind, which this cluster serves; explicit `{}` avoids the dict-default `json.loads`
+question). ASCII only.
+
+```bash
+juju ssh -m openstack magnum/0 "sudo tee /etc/magnum/magnum.conf.d/00-capi-helm.conf >/dev/null <<'CONF'
+[capi_helm]
+kubeconfig_file = /etc/magnum/kubeconfig
+helm_chart_repo = https://azimuth-cloud.github.io/capi-helm-charts
+helm_chart_name = openstack-cluster
+default_helm_chart_version = 0.25.1
+api_resources = {}
+CONF" </dev/null
+```
+If (and only if) Step 7.3 showed a core kind is v1beta2-only, append the override --
+ONE line, a JSON value naming just the kinds that need it:
+```
+    # api_resources = {"Cluster": {"api_version": "cluster.x-k8s.io/v1beta2"}, ...}
+```
+Re-check ASCII cleanliness:
+```bash
+juju ssh -m openstack magnum/0 \
+  'LC_ALL=C grep -nP "[^\x00-\x7F]" /etc/magnum/magnum.conf.d/00-capi-helm.conf && echo NON-ASCII || echo "ASCII clean"' </dev/null
+```
+
+## Step 7.7 -- Wire config-dir injection via /etc/default (D-037 REVISED; NOT a systemd drop-in)
+`# RUN: jumphost -> magnum/0`  These OpenStack debs run the daemon through an LSB
+init script wrapped by systemd `systemd-start`; a systemd `ExecStart` drop-in is
+INERT (appendix-A: D-037, L-P6-1/L-P6-2). The sanctioned extension point is
+`/etc/default/magnum-conductor`, sourced inside the init script AFTER the base
+`--config-file` is assembled. The charm does not manage that file.
+
+```bash
+# confirm the daemon currently has NO --config-dir (the problem we are fixing)
+juju ssh -m openstack magnum/0 'ps -ww -C magnum-conductor -o args=' </dev/null
+
+# create the per-service extension (literal $DAEMON_ARGS -- it expands at source time)
+juju ssh -m openstack magnum/0 \
+  "echo 'DAEMON_ARGS=\"\$DAEMON_ARGS --config-dir /etc/magnum/magnum.conf.d\"' \
+   | sudo tee /etc/default/magnum-conductor >/dev/null && \
+   sudo chmod 0644 /etc/default/magnum-conductor" </dev/null
+
+# DRY-RUN verify WITHOUT restarting: the init script's own show-args echoes the assembled cmdline
+juju ssh -m openstack magnum/0 '/etc/init.d/magnum-conductor show-args' </dev/null
+```
+GATE: `show-args` must show BOTH `--config-file=/etc/magnum/magnum.conf` AND
+`--config-dir /etc/magnum/magnum.conf.d`. Do not restart until this passes.
+RESIDUAL (logged): if a future charm hook ever writes /etc/default/magnum-conductor,
+the append is lost and [capi_helm] silently stops being read -- detect via show-args/ps.
+
+## Step 7.8 -- Restart conductor + verify driver + HEALTHY (P6e + D-042 Stage 6)
+`# RUN: jumphost -> magnum/0`, then jumphost health poll.
+
+```bash
+juju ssh -m openstack magnum/0 \
+  'sudo systemctl restart magnum-conductor && sleep 3 && systemctl is-active magnum-conductor && \
+   ps -ww -C magnum-conductor -o args=' </dev/null
+# expect: active; live cmdline carries --config-dir.
+
+juju ssh -m openstack magnum/0 'sudo magnum-driver-manage list-drivers 2>/dev/null | grep capi || \
+   echo "driver list (full):"; sudo magnum-driver-manage list-drivers' </dev/null
+# expect: k8s_capi_helm_v1 listed.
+```
+Health poll (the D-042 fix target -- this is what 1.3.0 reported UNHEALTHY):
+
+FRESH DEPLOY ROUTING: on a clean redeploy NO cluster exists yet, so there is nothing
+to poll -- SKIP this poll; the gate is discharged in phase-08 step 8.2
+(`capi-test-1` reaching `health_status = HEALTHY`). The poll below applies when
+grafting onto a cloud that already has a CAPI-driver cluster: substitute that
+cluster's name and the current `ENV(project)` id (both are run-specific).
+```bash
+( {
+  source ~/admin-openrc
+  unset OS_PROJECT_NAME OS_PROJECT_ID OS_TENANT_NAME OS_TENANT_ID
+  export OS_PROJECT_ID=674171fd28d446d3a37073b6a761e910       # ENV(project)
+  for i in $(seq 1 10); do
+    echo "[$i] health=$(openstack coe cluster show capi-test-1 -f value -c health_status 2>/dev/null)"
+    echo "    reason=$(openstack coe cluster show capi-test-1 -f value -c health_status_reason 2>/dev/null)"
+    sleep 20
+  done
+} )
+```
+GATE (existing-cluster graft only): `health_status -> HEALTHY`, with the
+`infrastructure` sub-check now `Ready` (it was the only failing axis under 1.3.0).
+On a FRESH DEPLOY this gate is deferred to phase-08 step 8.2 -- do not block here.
+If it does not clear on an existing-cluster graft, go to Rollback.
+
+## Step 7.9 -- Regression check (confirm create/manage path intact)
+`# RUN: jumphost` (capi-mgmt scope). Prove the upgraded driver still creates+deletes.
+
+FRESH DEPLOY ROUTING: SKIP this step -- the `capi-k8s-v1-32` template does not exist
+yet (phase-08 step 8.0 creates it), and phase-08 itself (create `capi-test-1` to
+CREATE_COMPLETE, full acceptance, then 8.5 delete) is a superset of this check. Run
+7.9 as written only when grafting onto an existing cloud where the template is present.
+```bash
+openstack coe cluster create capi-fix-check --cluster-template capi-k8s-v1-32 \
+  --keypair capi-mgmt-key --master-count 1 --node-count 1
+# watch to CREATE_COMPLETE, then:
+openstack coe cluster delete capi-fix-check    # watch to gone
+```
+
+## Rollback (TEMPORARY holding state only -- if 7.8 health does not clear or 7.9 regresses)
+`# RUN: jumphost -> magnum/0`  Reverts to the as-first-built functional
+(cosmetic-UNHEALTHY) state on 1.3.0 -- a TEMPORARY holding state to keep the conductor
+serving while the 1.4.0 issue is diagnosed, NOT a v1 end state. v1 is NOT complete until
+`magnum-capi-helm==1.4.0` is installed and `health_status = HEALTHY` (D-011). Re-attempt
+7.3-7.9 after diagnosis.
+```bash
+juju ssh -m openstack magnum/0 'sudo python3 -m pip install --no-deps --force-reinstall "magnum-capi-helm==1.3.0"' </dev/null
+# restore the config backup if you snapshotted one, then:
+juju ssh -m openstack magnum/0 'sudo systemctl restart magnum-conductor' </dev/null
+```
+
+---
+
+## EXIT GATE (phase-07 complete)
+- Conductor reaches the mgmt apiserver via the FIP (TCP-OK); kubeconfig 0600/magnum; helm list OK.
+- magnum-capi-helm 1.4.0 installed (contract-coherent, RELEASED); `k8s_capi_helm_v1` enumerated.
+- [capi_helm] drop-in read by the conductor (`--config-dir` present in the live cmdline).
+- `health_status = HEALTHY` (infrastructure Ready) on a CAPI-driver cluster -- D-042
+  issue eliminated. FRESH DEPLOY: no cluster exists yet; this item is DEFERRED to
+  phase-08 step 8.2 (existing-cluster graft: verify here on that cluster).
+- Regression create/delete passed (FRESH DEPLOY: deferred -- phase-08 8.1-8.5 is the
+  superset proof).
+- Proceed to phase-08 (workload-cluster acceptance + D-011).
+
+## As-built reference (2026-06-08/09 graft -- audit trail)
+- magnum/0: LXD 1/lxd/2 on openstack1, addr 10.12.4.76, charm magnum 2024.1/stable rev 70,
+  DEB magnum 18.0.1, python3.10, container ubuntu 22.04; conductor user `magnum`.
+- As-FIRST-built driver: 1.3.0 (pip --no-deps) -> read the version-less v1beta2 ref -> health UNHEALTHY (D-042).
+  PHASE-07 BASELINE supersedes this with the RELEASED magnum-capi-helm==1.4.0 (api_resources; default v1beta1).
+- kubeconfig: /etc/magnum/kubeconfig, -rw------- magnum, ~5657 bytes, server = FIP 10.12.7.40:6443.
+- conf.d drop-in /etc/magnum/magnum.conf.d/00-capi-helm.conf: kubeconfig_file, helm_chart_repo
+  (azimuth), helm_chart_name openstack-cluster, default_helm_chart_version 0.25.1 (api_resources
+  left default -- v1beta1 served by CAPI v1.13.2 / CAPO v0.14.4).
+- config-dir injection: /etc/default/magnum-conductor `DAEMON_ARGS="$DAEMON_ARGS --config-dir
+  /etc/magnum/magnum.conf.d"`; verified live via `ps` and the init script `show-args`.
+- helm v3.17.3 at /usr/local/bin/helm.
+- Driver internals (reference, from installed source): routes on (server_type vm, os ubuntu,
+  coe kubernetes); k8s version comes from the IMAGE `kube_version` property (NOT a template label),
+  os_distro=ubuntu; flavor floor 2048 MB / 2 vCPU; auto-mints an app credential (workload nodes use
+  the PUBLIC keystone interface); apiServer ALWAYS provisions an Octavia LB (+FIP default).
+
+## Next
+phase-08 -- workload-cluster acceptance: create a tenant cluster from template
+`capi-k8s-v1-32`, confirm CREATE_COMPLETE + Ready nodes + Calico + LB, and run the
+D-011 (amended per D-019) acceptance criteria.
diff --git a/runbooks/phase-08-workload-cluster-acceptance.md b/runbooks/phase-08-workload-cluster-acceptance.md
new file mode 100644
index 0000000..92d21d8
--- /dev/null
+++ b/runbooks/phase-08-workload-cluster-acceptance.md
@@ -0,0 +1,255 @@
+# Phase 08 -- Workload-Cluster Acceptance (D-011)
+
+Prove tenant self-service Kubernetes end to end: create a workload cluster from
+the `capi-k8s-v1-32` template, confirm it converges (Ready nodes, CNI, CCM/CSI,
+API LB), then run the D-011 acceptance bar. Passing D-011 is the gate that unlocks
+the project-completion tasks.
+
+Decisions: D-011 (acceptance bar; amended by D-019 -- item 8 Designate deferred),
+D-031/D-036 (driver/engine/chart coherence), D-039 (app-cred roles incl.
+load-balancer_member), D-040 (reserved-host-memory), D-041 (non-HA mgmt manual
+start), D-042 (driver contract coherence -> health HEALTHY after phase-07).
+Troubleshooting: appendix-A -- stuck-delete finalizer, LB-failover, OOM/manual-start,
+uninitialized-taint, CNI-label, DOCFIX-021.
+
+---
+
+## Prerequisites (must be true entering phase-08)
+- phase-04 done: the external provider network (`provider-ext`) exists. The workload
+  cluster's API-LB floating IP and node FIPs are allocated from it.
+- phase-05 done: Octavia enabled and healthy. The magnum-capi-helm driver ALWAYS
+  provisions an Octavia LB for the apiserver (`--master-lb-enabled`), so Octavia is a
+  hard prerequisite for workload-cluster create (not optional).
+- phase-07 EXIT GATE passed: conductor grafted, contract-coherent driver (1.4.0). On a
+  FRESH DEPLOY the HEALTHY + regression items of that gate are deferred to THIS phase
+  (8.2 health gate; 8.1-8.5 create path). On an existing-cluster graft, `health_status`
+  already reports HEALTHY (if the phase-07 1.4.0 upgrade was skipped, expect the COSMETIC
+  UNHEALTHY of D-042 -- functional, but not an acceptance pass).
+- Image `ubuntu-jammy-kube-v1.32.13` present AND carrying Glance properties
+  `kube_version` (e.g. v1.32.13) and `os_distro=ubuntu`. The driver reads the k8s
+  version from the IMAGE, not a template label (P6-CONTRACT / L-P6-3); a missing
+  property fails create.
+- Cluster template `capi-k8s-v1-32` present (8.0 verifies/creates it).
+- D-039: the Magnum service path mints app-creds carrying `load-balancer_member`
+  (+ member, reader). A frozen pre-D-039 app-cred 403s on the Octavia LB step and
+  wedges create/delete (appendix-A: stuck-delete).
+- D-040: `nova-compute reserved-host-memory = 8192` in effect on all compute hosts
+  (baked into the hardened bundle; verify below). Default 512 over-commits the
+  hyperconverged hosts and OOM-kills guests.
+
+## Constants and env-literals (TAG: confirm per site / run on rebuild)
+- `ENV(project)`       capi-mgmt    (id 674171fd28d446d3a37073b6a761e910)
+- `ENV(admin-project)` admin        (id 65ce73e6798e4d1e8dd066609b7033ef)
+- `ENV(template)`      capi-k8s-v1-32   (uuid e2549d8b-4b89-4947-8b9a-0f4fdbe87d59)
+- `ENV(image)`         ubuntu-jammy-kube-v1.32.13 (id de69c243-bd1f-4182-8e9e-33933e926857)
+- `ENV(ext-net)`       provider-ext (id 70b34bb2-3afb-4b43-96d3-f520dbcbf9a8)
+- `ENV(keypair)`       capi-mgmt-key
+- `ENV(cluster)`       capi-test-1
+- `ENV(workload-cidr)` 10.20.16.0/24
+- `ENV(flavors)`       master gp.mid (8192/2) ; worker capi.node (4096/2)
+- run-specific (do NOT hardcode -- capture at run): API LB id, LB VIP (10.20.16.x),
+  workload API FIP (10.12.7.180 on the as-built run).
+
+## Scope-hygiene preambles (the project-scope leak guard)
+Capi-mgmt-scoped (cluster CRUD, show, config):
+```bash
+source ~/admin-openrc
+unset OS_PROJECT_NAME OS_PROJECT_ID OS_TENANT_NAME OS_TENANT_ID OS_PROJECT_DOMAIN_ID OS_PROJECT_DOMAIN_NAME
+export OS_PROJECT_ID=674171fd28d446d3a37073b6a761e910      # ENV(project)
+```
+Admin-scoped (LB amphora/failover -- these 403 under tenant member scope):
+```bash
+source ~/admin-openrc
+unset OS_PROJECT_ID OS_TENANT_ID OS_TENANT_NAME            # token -> admin 65ce73e6...
+```
+
+---
+
+## Step 8.0 -- Verify prerequisites; create the template if absent
+`# RUN: jumphost` (capi-mgmt scope). Read-only checks consolidated; template create
+gated separately. (NOTE: template + image are tenant-setup artifacts; on a fully
+fresh build they may be produced by the magnum-setup step -- this phase
+verifies/creates the template for self-containment.)
+
+```bash
+( {
+  set -u
+  echo "=== image present + carries kube_version / os_distro ==="
+  openstack image show ubuntu-jammy-kube-v1.32.13 -f json \
+    | python3 -c 'import json,sys;d=json.load(sys.stdin);p=d.get("properties",d);print("kube_version=",d.get("kube_version") or p.get("kube_version"));print("os_distro=",d.get("os_distro") or p.get("os_distro"))'
+  echo "=== reserved-host-memory (D-040) on a compute unit ==="
+  juju ssh nova-compute/0 'sudo grep -i reserved_host_memory /etc/nova/nova.conf' </dev/null   # expect 8192
+  echo "=== template present? ==="
+  openstack coe cluster template show capi-k8s-v1-32 -f value -c uuid 2>/dev/null \
+    && echo "template OK" || echo "template ABSENT -- create it below"
+} )
+```
+Create the template only if absent (spec from the as-built capture; the two labels
+are intentionally the whole config -- chart 0.25.1 + the conf.d drop-in govern the
+rest). `--network-driver` is OMITTED deliberately: under the 1.4.0 driver the option
+IS honored (it maps to the chart `network_driver`), so to keep the as-built chart
+default (Calico) we leave it unset. Setting `flannel` here would now switch the CNI --
+do that only if Calico is being intentionally replaced (appendix-A: CNI-label / 1.4.0).
+```bash
+openstack coe cluster template create capi-k8s-v1-32 \
+  --coe kubernetes --server-type vm \
+  --image ubuntu-jammy-kube-v1.32.13 \
+  --external-network provider-ext \
+  --master-flavor gp.mid --flavor capi.node \
+  --master-lb-enabled --floating-ip-enabled \
+  --dns-nameserver 8.8.8.8 \
+  --docker-storage-driver overlay2 \
+  --labels fixed_subnet_cidr=10.20.16.0/24,octavia_provider=amphora
+```
+
+## Step 8.1 -- Create the workload cluster (MUTATION)
+`# RUN: jumphost` (capi-mgmt scope). 1 control-plane + 2 workers, matching the
+as-built capi-test-1. The driver auto-mints the app-cred (D-039) and always
+provisions an Octavia LB (+FIP) for the API.
+
+```bash
+openstack coe cluster create capi-test-1 \
+  --cluster-template capi-k8s-v1-32 \
+  --keypair capi-mgmt-key \
+  --master-count 1 --node-count 2
+openstack coe cluster show capi-test-1 -f value -c uuid -c status
+```
+
+## Step 8.2 -- Watch to CREATE_COMPLETE; capture the LB/FIP
+`# RUN: jumphost` (capi-mgmt scope). Poll; capture run-specific LB id + FIP.
+```bash
+( {
+  for i in $(seq 1 40); do
+    S=$(openstack coe cluster show capi-test-1 -f value -c status 2>/dev/null)
+    echo "[$i] status=$S"
+    case "$S" in CREATE_COMPLETE|CREATE_FAILED) break;; esac
+    sleep 30
+  done
+  echo "=== api endpoint + node counts ==="
+  openstack coe cluster show capi-test-1 -f value -c api_address -c master_count -c node_count -c health_status
+} )
+```
+GATE: `status = CREATE_COMPLETE`. Record `api_address` (the FIP endpoint, e.g.
+https://10.12.7.180:6443) for 8.3. If `CREATE_FAILED`, see appendix-A (stuck-delete
+/ app-cred 403 / OOM). With phase-07's driver, `health_status` should read HEALTHY.
+
+## Step 8.3 -- Retrieve the workload kubeconfig; verify nodes / CNI / addons
+`# RUN: jumphost`. Pull the cluster's kubeconfig via Magnum, then inspect.
+```bash
+# capi-mgmt scope
+openstack coe cluster config capi-test-1 --dir ~/capi-test-1 --force
+export KUBECONFIG=~/capi-test-1/config
+# LIVE-REVIEW: confirm `coe cluster config` returns a usable kubeconfig under the
+#   capi-helm driver; alternative is the CAPI kubeconfig secret on the mgmt cluster:
+#   KUBECONFIG=~/capi-mgmt.kubeconfig clusterctl -n <magnum-ns> get kubeconfig <cluster-name-suffix>
+
+( {
+  export KUBECONFIG=~/capi-test-1/config
+  echo "=== nodes (expect 3 Ready, v1.32.13: 1 control-plane + 2 workers) ==="
+  kubectl get nodes -o wide
+  echo "=== CNI = Calico (chart default; --network-driver omitted) ==="
+  kubectl -n kube-system get pods | grep -Ei 'calico|tigera' || kubectl get pods -A | grep -Ei 'calico|tigera'
+  echo "=== CCM (OpenStack cloud-controller-manager) + Cinder CSI + CoreDNS Running ==="
+  kubectl get pods -A | grep -Ei 'cloud-controller|openstack-cloud|cinder-csi|coredns'
+  echo "=== any not-Running pods? (expect none) ==="
+  kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded
+} )
+```
+GATE: 3 nodes Ready; Calico pods Running; CCM Running (NOT crash-looping -- this is
+D-011 item 5); Cinder CSI + CoreDNS Running; no stuck pods.
+
+================================================================================
+## Step 8.4 -- D-011 acceptance bar (the gate)
+================================================================================
+Run each; record pass/fail. Wording adapted to the as-built IP-only endpoints (B5)
+where the original D-011 said "hostname."
+
+- **D-011.1 -- All charms active/idle.** `# RUN: jumphost`
+  `juju status --format=short | grep -vE 'active|idle' || echo "all active/idle"`
+  Pass: nothing but active/idle (phase-03 re-confirmed here).
+
+- **D-011.2 -- API reachability from the jumphost (all public VIPs).** `# RUN: jumphost`
+  IP-only: hit each service VIP, e.g. Keystone:
+  `curl -sk https://10.12.4.50:5000/v3 -o /dev/null -w '%{http_code}\n'` (expect 200/300).
+  Repeat per public VIP (.50-.60 block). Pass: all respond.
+
+- **D-011.3 -- API reachability from a tenant VM (Option B).** `# RUN: mgmt VM`
+  The generalized phase-06 GATE 1: a tenant VM reaches the provider VIP.
+  `ssh ... ubuntu@10.12.7.40 "timeout 6 bash -c 'exec 3<>/dev/tcp/10.12.4.50/5000' && echo VIP-OK || echo VIP-FAIL" </dev/null`
+  Pass: VIP-OK (proves the shared-L2 Option B path).
+
+- **D-011.4 -- Octavia LB pattern re-passes (round-robin, failover, recovery).**
+  Round-robin: 2-member pool behind a VIP, repeated curls hit both members.
+  Recovery (admin scope): `openstack loadbalancer failover <api-lb-id>` -> watch
+  ERROR/PENDING_UPDATE -> ACTIVE (~100s; single STANDALONE amphora -> brief blip;
+  operating_status holds ONLINE). (appendix-A: LB-failover; amphora ops are
+  admin-scope only.) Pass: round-robin distributes; failover returns to ACTIVE.
+  TODO (before sign-off): this runbook does NOT yet contain the build steps for the
+  standalone 2-member round-robin pool (LB + listener + pool + 2 backend members +
+  health monitor). Add them here, or fold the round-robin check into the
+  workload-cluster API LB the driver already builds, before D-011.4 is marked complete.
+
+- **D-011.5 -- End-to-end Magnum CAPI cluster create, CCM not crash-looping.**
+  Satisfied by 8.1-8.3 (CREATE_COMPLETE + CCM Running). Pass = that gate.
+
+- **D-011.6 -- Vault unseal (MANUAL is the v1 standard).** `# RUN: jumphost`
+  Confirm vault `Sealed=false` now. The v1 standard is MANUAL unseal after a unit
+  reboot (3-of-5 key shares entered at the hidden prompt -- see phase-02); auto-unseal
+  is an available option, adopted case-by-case (NOT configured in v1). This is a
+  re-confirmation at acceptance, not a re-init. Pass: vault unsealed, and the operator
+  can re-unseal manually after a reboot.
+
+- **D-011.7 -- KVM snapshot baseline taken.** `# RUN: jumphost hypervisor`
+  Per D-012: Snapshot 1 (post-deploy, post-validation, pre-tenant-resources) and
+  Snapshot 2 (post-tenant-setup). qcow2-level, per-VM, on the jumphost hypervisor.
+  Pass: Snapshot 1 captured (Snapshot 2 after tenant setup).
+
+- **D-011.8 -- Designate zones + tenant hostname resolution.** DEFERRED.
+  D-019 deferred Designate (dropped do-doc-10-dns). Also moot under IP-only B5:
+  there are no API hostnames to resolve; tenants use IPs/VIPs. Re-scope when DNS
+  returns (v2). NOT required for v1 acceptance.
+
+## Step 8.5 -- (Optional) Clean delete verification
+`# RUN: jumphost` (capi-mgmt scope). Confirms the manage/teardown path.
+```bash
+openstack coe cluster delete capi-test-1     # watch coe cluster list to gone
+```
+If a delete WEDGES (DELETE_IN_PROGRESS, CRs stuck Deleting on an Octavia 403 from a
+frozen app-cred): clear the OpenStackCluster finalizer (the Cluster auto-follows),
+then manual neutron cleanup in dependency order -- appendix-A: stuck-delete.
+```bash
+# NS=magnum-674171fd28d446d3a37073b6a761e910
+# KUBECONFIG=~/capi-mgmt.kubeconfig kubectl -n $NS patch openstackcluster <cluster>-<suffix> \
+#   --type=merge -p '{"metadata":{"finalizers":[]}}'
+# then: openstack router remove subnet / router unset external-gateway / router delete /
+#       subnet delete / network delete / security group delete  (dependency order)
+```
+
+---
+
+## EXIT GATE (phase-08 / v1 acceptance)
+- 8.1-8.3 passed: capi-test-1 CREATE_COMPLETE, 3 Ready nodes, Calico, CCM/CSI/CoreDNS, API LB ACTIVE/ONLINE.
+- D-011 items 1-7 PASS; item 8 deferred (D-019).
+- health_status HEALTHY (phase-07 driver).
+- => v1 deployment is ACCEPTED. Project-completion tasks unlocked:
+  consolidate the do-doc runbooks into docs/v1-deploy-runbook.md; revert the
+  GitBucket repo OpenStack/openstack-caracal-ipv4 to PRIVATE.
+
+## As-built reference (capi-test-1, suffix kgwwe7c4qj6a, 2026-06-09)
+- create: `--master-count 1 --node-count 2`; uuid 6de15cf4-8805-4ac2-b413-8de2c48d92cf.
+- nodes: control-plane (xsc62) + 2 workers; v1.32.13; Calico CNI.
+- API LB id 0f968008-8429-4ac3-8b82-452e126982cf, VIP 10.20.16.144, FIP 10.12.7.180,
+  endpoint https://10.12.7.180:6443; single STANDALONE amphora.
+- CCM / Cinder CSI / CoreDNS Running; all addons scheduled; CREATE_COMPLETE.
+- Incident on the as-built run (recovery patterns -> appendix-A): host OOM SHUTOFF the
+  mgmt VM (D-041 manual `openstack server start capi-mgmt-v2`); API LB went
+  provisioning_status ERROR -> admin-scope `loadbalancer failover` (ACTIVE ~100s);
+  workers held the CAPI uninitialized taint until the mgmt API returned, then addons
+  scheduled. Root remediation: D-040 reserved-host-memory 512 -> 8192.
+- health_status was UNHEALTHY on the as-built run (cosmetic, D-042) -- phase-07's
+  contract-coherent driver clears it to HEALTHY.
+
+## Next
+v1 acceptance passes here. Proceed to the project-completion workstream: runbook
+consolidation (this phase set -> docs/v1-deploy-runbook.md), appendix-A authoring,
+the repo change-list, and reverting repo visibility to private.
diff --git a/runbooks/v1-do-doc-01-prep.md b/runbooks/v1-do-doc-01-prep.md
deleted file mode 100644
index 4ec3901..0000000
--- a/runbooks/v1-do-doc-01-prep.md
+++ /dev/null
@@ -1,349 +0,0 @@
-# v1 Do-Document 01 — Pre-Deploy State Check
-
-**Status:** First execution document of Batch A. Pure read-only verification. No cloud state is mutated.
-
-**Position in sequence:** Runs after `docs/v1-pre-deploy-fixes.md` (the repo-hygiene fixes) have been committed and pushed. Runs before `v1-do-doc-02-pki.md`.
-
-**Replaces:** `runbooks/deprecated/00-pre-deploy.md` (which contained references to D-013 graceful CAPI teardown, capi-mgmt preservation, and `netbox/vlans-import.py` — all superseded or removed).
-
-**Cross-references:**
-
-- D-014 (repo path) — verifies clone matches
-- D-015 (v1/v2 fork) — v1 scope confirmed
-- D-017 (CAPI bootstrap cluster lifecycle) — every cycle is a full rebuild; no preservation
-- D-018 (teardown strategy) — MAAS-release-direct; pre-existing teardown verified
-
----
-
-
-## 1. Purpose & scope
-
-This document confirms the jumphost, repo, openrc files, Juju controller, and MAAS state are all in the expected pre-deploy posture before any cloud-touching execution begins.
-
-**What this document does:**
-
-- Verifies jumphost identity and shell context
-- Verifies repo cloned at `$HOME/openstack-caracal-ipv4` with credential helper configured
-- Verifies the repo is on `main` and pulled up to date (so the pre-deploy fixes from `docs/v1-pre-deploy-fixes.md` are local)
-- Verifies admin and user1 openrc files exist and source cleanly
-- Verifies the Juju controller for the `openstack` model is reachable
-- Verifies all 5 cloud-target VMs are MAAS-Ready
-- Explicitly acknowledges what is NOT in scope for this prep phase
-
-**What this document does NOT do:**
-
-- Run NetBox imports (pinned for external NetBox-engineer review; not part of v1 deploy flow per `netbox/README.md`)
-- Take KVM snapshots (per D-017, every cycle is a full rebuild; no rollback target needed; pre-existing snapshots remain as a safety net but are not refreshed here)
-- Back up Vault unseal keys (the prior-cloud Vault keys are accepted lost per the Caracal_Rebuild handoff; the Caracal deploy will reinit Vault from scratch)
-- Graceful CAPI workload teardown (superseded by D-018)
-- Backup of `juju export-bundle` output (the canonical bundle is `bundle.yaml` in the repo; export is for diagnostics only and runs only if cloud is currently up)
-
-**Out of scope:**
-
-- Bundle review (done; tracked in `docs/v1-pre-deploy-fixes.md`)
-- PKI generation (next document, `v1-do-doc-02-pki.md`)
-- Cloud destroy (`runbooks/01-destroy-model.md`; pre-existing teardown likely already done; verify-only path used by v1-do-doc-03)
-
----
-
-
-## 2. Decisions captured
-
-| Decision | Choice | Roosevelt parallel |
-|---|---|---|
-| Snapshot strategy for this rebuild | None taken here; existing KVM snapshots remain as safety net but per D-017 every cycle is a full rebuild | N/A — Roosevelt is bare-metal; equivalent is MAAS re-deploy of all hosts |
-| Vault key backup | Skipped; prior keys accepted lost | Vault will be re-initialized from scratch each Roosevelt rebuild too |
-| NetBox import sequence | Out of scope; pinned external | Roosevelt's NetBox state will be set up before the bundle pulls values |
-| State capture | Skipped for this Caracal cycle (cloud is already down) | On Roosevelt, capture `juju export-bundle` and `juju status` BEFORE the next teardown cycle |
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification command |
-|---|---|
-| You are SSH'd into jumphost `vopenstack-jesse` as `jessea123` | `hostname && id -un` |
-| `$HOME` is writable | `test -w "$HOME" && echo OK` |
-| `git`, `juju`, `maas`, `openssl`, and `python3` available on PATH | `for t in git juju maas openssl python3; do command -v $t || echo "MISSING: $t"; done` |
-| The openstack snap is installed (used in later runbooks; not strictly needed here) | `snap list openstackclients 2>/dev/null || echo "(not installed; v1-do-doc-04 will check)"` |
-
-If any prereq fails, stop and resolve before continuing.
-
----
-
-
-## 4. Step-by-step state check
-
-Each section is a copy-paste block. The blocks are read-only. No `exit` is used in interactive context; failures are reported as text and operator decides whether to proceed.
-
-### 4.1 Jumphost identity
-
-```bash
-echo "=== Jumphost identity ==="
-hostname
-id -un
-date -Iseconds
-echo "=== Disk space ==="
-df -h "$HOME" /var/lib/libvirt/images 2>/dev/null
-```
-
-**Expected:**
-
-- `hostname` → `vopenstack-jesse` (or your configured equivalent)
-- `id -un` → `jessea123`
-- `$HOME` free space: at least 5 GB (octavia PKI, deploy logs, kubeconfigs all live here)
-- `/var/lib/libvirt/images` free space: enough for the existing OSD qcow2 files (already trimmed to ~200 KiB each per teardown)
-
-### 4.2 Repository clone present at agreed path
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-
-if [ ! -d "$REPO/.git" ]; then
-  echo "MISSING: $REPO does not contain a git checkout."
-  echo "Run the agreed clone procedure (HTTPS clone with 72h credential cache) first:"
-  echo "  cd \$HOME && git clone https://git.baldurkeep.com/git/OpenStack/openstack-caracal-ipv4.git"
-  echo "  cd $REPO && git config credential.helper \"cache --timeout=259200\""
-  echo ""
-  echo "Stop here. Re-run this section after the clone."
-else
-  echo "[OK] Repo present at: $REPO"
-  cd "$REPO"
-  echo ""
-  echo "=== Current branch ==="
-  git branch --show-current
-  echo ""
-  echo "=== Remote URL ==="
-  git remote get-url origin
-  echo ""
-  echo "=== Credential helper ==="
-  git config --get credential.helper || echo "(no credential helper configured)"
-  echo ""
-  echo "=== Working tree status ==="
-  git status --short
-  echo ""
-  echo "=== Latest commit ==="
-  git log --oneline -1
-fi
-```
-
-**Expected:**
-
-- Branch: `main`
-- Remote URL: `https://git.baldurkeep.com/git/OpenStack/openstack-caracal-ipv4.git`
-- Credential helper: `cache --timeout=259200` (or however configured)
-- Working tree status: empty (clean)
-- Latest commit: should reflect the most recent pre-deploy-fixes commit (the one that moved 8 runbooks to deprecated/)
-
-If branch is not `main`, the working tree is dirty, or commits are behind the remote, stop and reconcile before continuing.
-
-### 4.3 Pre-deploy fixes are present locally
-
-```bash
-cd "$REPO"
-
-echo "=== ceph-osd block (after pre-deploy fix): should NOT contain a 'storage:' line ==="
-grep -A 12 "^  ceph-osd:" bundle.yaml
-echo ""
-
-echo "=== Verify ceph-osd has no storage block ==="
-grep -A 12 "^  ceph-osd:" bundle.yaml | grep "^    storage:" \
-  && echo "[FAIL] storage block still present under ceph-osd" \
-  || echo "[OK] no storage block in ceph-osd"
-echo ""
-
-echo "=== expected-osd-count on ceph-mon: should be 4 ==="
-grep -A 8 "^  ceph-mon:" bundle.yaml | grep "expected-osd-count"
-echo ""
-
-echo "=== VIP grep: should return 12 ==="
-grep -cE "^[[:space:]]+vip: 10\.12\.4\." bundle.yaml
-echo ""
-
-echo "=== D-002 Vault row dedup: dedicated row remains, OS-core row no longer mentions vault ==="
-grep -c "Vault.*1.8/stable" docs/design-decisions.md
-grep "magnum, vault)" docs/design-decisions.md && echo "[FAIL] vault still in OS-core row" || echo "[OK] vault not in OS-core row"
-echo ""
-
-echo "=== D-014 repo path: should be OpenStack/openstack-caracal-ipv4 ==="
-grep -A 3 "^## D-014" docs/design-decisions.md | grep "Repo path:"
-echo ""
-
-echo "=== Deprecated runbooks moved: count should be 8 (or 9 with README) ==="
-ls runbooks/deprecated/ 2>/dev/null | wc -l
-```
-
-**Expected:**
-
-- ceph-osd block shows `options.osd-devices: /dev/vdb` and NO `storage:` block
-- `storage:` count under ceph-osd: 0
-- `expected-osd-count: 4`
-- VIP grep returns 12
-- D-002: one `Vault.*1.8/stable` match; OS-core row does NOT mention vault
-- D-014: shows `OpenStack/openstack-caracal-ipv4`
-- `runbooks/deprecated/` contains 8 runbooks + 1 README
-
-If any of these don't match, the pre-deploy fixes did not land correctly. Stop and reconcile (re-pull, re-apply missing commits).
-
-### 4.4 Openrc files present and source cleanly
-
-```bash
-echo "=== admin openrc ==="
-if [ -f "$HOME/admin-openrc" ]; then
-  echo "[OK] $HOME/admin-openrc exists"
-  ( source "$HOME/admin-openrc"; \
-    env | grep -E "^OS_" | grep -v PASSWORD | sort )
-else
-  echo "[MISSING] $HOME/admin-openrc"
-fi
-
-echo ""
-echo "=== user1 openrc ==="
-if [ -f "$HOME/user1-openrc" ]; then
-  echo "[OK] $HOME/user1-openrc exists"
-  ( source "$HOME/user1-openrc"; \
-    env | grep -E "^OS_" | grep -v PASSWORD | sort )
-else
-  echo "[MISSING] $HOME/user1-openrc"
-fi
-```
-
-**Expected:**
-
-- Both files exist and contain OS_AUTH_URL, OS_USERNAME, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME, OS_PROJECT_DOMAIN_NAME, OS_IDENTITY_API_VERSION, OS_REGION_NAME (and OS_PASSWORD, which is filtered out of the display).
-- The subshell-source pattern (`( source ...; env ... )`) prevents the openrc from polluting your interactive shell environment.
-
-**Note:** these openrc files target the **prior** Bobcat cloud (or whatever was last running). They will NOT work against the Caracal cloud until that cloud is up. They're verified here as a sanity check that the files exist for the operator. New openrc files will be generated as part of `v1-do-doc-09-tenant.md` (Batch D) using the new Caracal endpoints.
-
-### 4.5 Juju controller reachable
-
-```bash
-echo "=== Juju controllers ==="
-juju controllers
-echo ""
-echo "=== Current controller ==="
-juju show-controller 2>/dev/null | head -20
-echo ""
-echo "=== Models on current controller ==="
-juju models
-```
-
-**Expected:**
-
-- At least one controller is listed.
-- The controller should be in `Available` state.
-- The `openstack` model may or may not exist depending on whether the prior teardown completed. Per the Caracal_Rebuild handoff (2026-05-27 verification), it does not exist — that's expected post-teardown.
-
-If `juju controllers` errors, the controller is unreachable. Stop and resolve juju access before continuing.
-
-### 4.6 MAAS state of cloud-target VMs
-
-```bash
-echo "=== MAAS profile ==="
-maas list | head -5
-export MAAS_PROFILE=$(maas list | awk 'NR==1 {print $1}')
-echo "Using MAAS_PROFILE=$MAAS_PROFILE"
-echo ""
-
-echo "=== Cloud-target VMs (openstack0-3 + capi-mgmt): expect 5 in Ready ==="
-maas "$MAAS_PROFILE" machines read 2>/dev/null \
-  | python3 -c "
-import json, sys
-machines = json.load(sys.stdin)
-targets = ['openstack0', 'openstack1', 'openstack2', 'openstack3', 'capi-mgmt']
-print(f'{\"hostname\":<15} {\"status\":<15} {\"owner\":<15} {\"system_id\":<25}')
-print('-' * 70)
-seen = []
-for m in machines:
-    h = m.get('hostname', '')
-    if h in targets:
-        seen.append(h)
-        status = m.get('status_name', '')
-        owner = m.get('owner', '') or '(none)'
-        sid = m.get('system_id', '')
-        print(f'{h:<15} {status:<15} {owner:<15} {sid:<25}')
-missing = [t for t in targets if t not in seen]
-if missing:
-    print(f'\nMISSING from MAAS: {missing}')
-"
-```
-
-**Expected:**
-
-- All 5 hostnames appear: `openstack0`, `openstack1`, `openstack2`, `openstack3`, `capi-mgmt`
-- Status for all 5: `Ready`
-- Owner for all 5: `(none)` (unowned, ready for deploy)
-
-If any VM is not `Ready` or has an owner, stop. The teardown either did not complete or something has acquired the VMs since.
-
-### 4.7 Acknowledged "not done here" items
-
-This step prints a checklist for the operator to mentally acknowledge before proceeding:
-
-```bash
-cat <<'EOF'
-=== Items NOT done in this prep phase (acknowledged) ===
-
-[ ] NetBox imports — pinned for external NetBox-engineer review; not blocking
-    v1 deploy. NetBox state is whatever the engineer has set up.
-
-[ ] KVM snapshots — per D-017, every cycle is a full rebuild; no rollback
-    target needed for this deploy. Pre-existing KVM-level snapshots (from
-    prior cycles) remain on disk as a safety net but are not refreshed here.
-
-[ ] Vault unseal key backup — prior Vault keys are accepted lost per the
-    Caracal_Rebuild handoff. Caracal deploy will run `vault operator init`
-    fresh in v1-do-doc-05.
-
-[ ] Graceful CAPI workload teardown — D-018 supersedes D-013; teardown is
-    MAAS-release-direct, not graceful. The teardown that happened pre-2026-05-27
-    used the D-018 path.
-
-[ ] juju export-bundle / juju status capture — the cloud is currently down,
-    so there is nothing to capture. On future cycles where the cloud is up
-    pre-teardown, those captures happen in runbook 01 Phase A.
-
-Proceed only if every item above is acknowledged.
-EOF
-```
-
----
-
-
-## 5. Acceptance criteria — go/no-go for v1-do-doc-02
-
-The following must all be true before proceeding:
-
-- [ ] §4.1: Identity correct, disk space sufficient
-- [ ] §4.2: Repo cloned at `$HOME/openstack-caracal-ipv4`, on `main`, clean, with credential helper configured
-- [ ] §4.3: All six pre-deploy fixes verified present in the local checkout (storage block removed, expected-osd-count=4, VIPs=12, D-002 dedup, D-014 path, 8 deprecated files)
-- [ ] §4.4: Both openrc files exist and source cleanly (content of openrc not yet validated against new cloud — that's v1-do-doc-04+)
-- [ ] §4.5: Juju controller reachable
-- [ ] §4.6: All 5 cloud-target VMs MAAS-Ready, unowned
-- [ ] §4.7: "Not done here" items all acknowledged
-
-If all checked, proceed to `v1-do-doc-02-pki.md`.
-
----
-
-
-## 6. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Jumphost | `vopenstack-jesse` KVM VM | TBD — likely a bastion VM or operator workstation |
-| Cloud target VMs | 5 KVM VMs in libvirt on this jumphost | bare-metal MAAS-managed servers at the Roosevelt site |
-| MAAS profile name | Single profile on the local MAAS | May be a Roosevelt-specific MAAS instance |
-| openrc files | Manually-maintained `$HOME/admin-openrc`, `$HOME/user1-openrc` | Vault-issued or app-credential-based with rotation |
-| KVM snapshots | Optional safety net | N/A — equivalent is MAAS re-deploy |
-| Vault unseal keys | Generated fresh each Caracal cycle | Managed via Vault's own backup mechanism |
-
----
-
-
-## 7. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-27 | Document created. Replaces stale runbook 00 (D-013 graceful teardown + capi-mgmt preservation, both superseded). | Batch A drafting |
diff --git a/runbooks/v1-do-doc-02-pki.md b/runbooks/v1-do-doc-02-pki.md
deleted file mode 100644
index 15f17c1..0000000
--- a/runbooks/v1-do-doc-02-pki.md
+++ /dev/null
@@ -1,687 +0,0 @@
-# v1 Do-Document 02 — Octavia LBaaS PKI Overlay Generation
-
-**Status:** Second execution document of Batch A. Generates local-only crypto material (no cloud touched). Runs after `v1-do-doc-01-prep.md` (state check). Runs before `runbooks/01-destroy-model.md` re-verification (Batch B doc 03 will point to that runbook).
-
-**Replaces:** `runbooks/deprecated/01a-octavia-pki-generation.md` (which had a buggy VIP-count grep pattern, an obsolete §12 bundle-housekeeping block, a self-reference off-by-one, and `exit 1` blocks in operator-facing context that violate the no-exit-in-pasted-shell rule).
-
-**Cross-references:**
-
-- D-007 (Magnum / Octavia inclusion) — Octavia bundle integration
-- Bundle `octavia.options` PKI material section
-- `overlays/octavia-pki.yaml` (gitignored — output of this document)
-- Workstream 3a decision (2026-05-22): generate fresh, EC P-384 CAs, overlay-file approach
-
----
-
-
-## 1. Purpose & scope
-
-Generate a complete two-tier PKI for Charmed Octavia's amphora load-balancer trust domain. The output is a single overlay file (`overlays/octavia-pki.yaml`) that the v1-do-doc-04 deploy step will pass to `juju deploy --overlay`.
-
-Octavia uses two CAs:
-
-- **Issuing CA** — signs each amphora's server certificate at LB-creation time. Octavia receives the private key and passphrase (so it can sign at runtime).
-- **Controller CA** — trust anchor for connections **from** the Octavia controller to the amphorae. Octavia receives only the cert (no key needed at runtime; signing of controller certs is a humans-only rotation event).
-
-Plus one controller certificate (cert + key bundled) signed by the Controller CA.
-
-Five charm options on the `octavia` application consume the artifacts:
-
-| Charm option | Content | Format |
-|---|---|---|
-| `lb-mgmt-issuing-cacert` | Issuing CA certificate | base64-encoded PEM |
-| `lb-mgmt-issuing-ca-private-key` | Issuing CA encrypted private key | base64-encoded PEM (already encrypted with passphrase) |
-| `lb-mgmt-issuing-ca-key-passphrase` | Issuing CA key passphrase | plain string (NOT base64) |
-| `lb-mgmt-controller-cacert` | Controller CA certificate | base64-encoded PEM |
-| `lb-mgmt-controller-cert` | Controller cert + key, concatenated | base64-encoded PEM bundle |
-
-**Scope:** v1 testcloud (VR0 DC0 Omega Cloud). Roosevelt deltas in §14.
-
-**Out of scope:**
-
-- Octavia API TLS (issued by Vault via `octavia:certificates` relation in the bundle; separate concern)
-- Rotation procedure (deferred to Roosevelt runbook; testcloud rotation pointer in §15)
-
----
-
-
-## 2. Decisions captured
-
-Per workstream 3a sign-off (2026-05-22):
-
-| Decision | Choice | Roosevelt parallel |
-|---|---|---|
-| Cert provenance | Generate fresh (no Bobcat-backup copy) | Vault PKI engine |
-| CA key algorithm | EC P-384 | EC P-384 (Vault root) |
-| Controller cert algorithm | EC P-256 | EC P-256 |
-| CA validity | 10 years | 5-year intermediate, Vault-rotated |
-| Controller cert validity | 2 years | 90 days, auto-rotated |
-| Distribution method | Juju overlay file (gitignored) | Vault-injected at deploy |
-| Storage path on jumphost | `$HOME/octavia-pki/` | Vault PKI mounts |
-| Passphrase strength | 32 random bytes, base64-encoded (44 chars) | Vault-generated |
-
-**Naming convention:**
-
-- Issuing CA CN: `VR0 DC0 Omega Cloud Octavia Issuing CA`
-- Controller CA CN: `VR0 DC0 Omega Cloud Octavia Controller CA`
-- Controller cert CN: `octavia-controller.omega.dc0.vr0.cloud.neumatrix.local`
-- Controller cert SANs: above CN, plus `octavia.omega.dc0.vr0.cloud.neumatrix.local`, plus `10.12.4.233` (the Octavia API VIP)
-- Organization (O): `Neumatrix`
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| `v1-do-doc-01-prep.md` completed cleanly | Manual confirmation; all §5 acceptance items checked |
-| Executor on jumphost `vopenstack-jesse` as `jessea123` | `hostname && id -un` |
-| `openssl` version 3.x or later | `openssl version` |
-| `$HOME` writable | `test -w "$HOME" && echo OK` |
-| Repository cloned at `$HOME/openstack-caracal-ipv4` | Verified in v1-do-doc-01 §4.2 |
-| Repository on `main`, clean, up to date | Verified in v1-do-doc-01 §4.2 |
-| Pre-deploy fixes commits all landed | Verified in v1-do-doc-01 §4.3 |
-
-**Shell context — paste once at start:**
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-echo "REPO=$REPO"
-test -d "$REPO/.git" && echo "[OK] repo present" || echo "[FAIL] repo missing"
-cd "$REPO"
-```
-
-**Verify pre-deploy fixes are present (smoketest against the corrected grep pattern):**
-
-```bash
-echo "VIP grep (corrected pattern, expect 12):"
-grep -cE "^[[:space:]]+vip: 10\.12\.4\." "$REPO/bundle.yaml"
-```
-
-If this returns anything other than `12`, the bundle pre-deploy fix did not land — stop and reconcile before proceeding.
-
-> **Note on the grep pattern:** the deprecated runbook 01a used `^ vip: 10.12.4.` with a single literal space, which matches zero lines because the bundle's `vip:` entries live inside `options:` blocks indented six spaces deep. The corrected pattern `^[[:space:]]+vip: 10\.12\.4\.` matches any leading whitespace and escapes the dot literals.
-
----
-
-
-## 4. Pre-flight: gitignore patch (DO THIS FIRST)
-
-**Critical:** the `.gitignore` patch must be in `main` BEFORE any private key material exists on disk in the workspace. This minimizes the race window for an accidental commit.
-
-The current `.gitignore` already catches `*.key`, `*.crt`, `*.pem` via wildcards, but does NOT catch `overlays/octavia-pki.yaml` (a `.yaml` file) or `passphrase.txt` (a `.txt` file). This step adds the missing patterns.
-
-```bash
-cd "$REPO"
-
-# Idempotent patch — only add the block if the overlay path is not already protected
-if ! grep -q "^overlays/octavia-pki.yaml" .gitignore; then
-  cat >> .gitignore <<'EOF'
-
-# Octavia PKI artifacts — never commit
-overlays/octavia-pki.yaml
-octavia-pki/
-passphrase.txt
-EOF
-  echo "[OK] .gitignore patched"
-else
-  echo "[OK] .gitignore already has overlay protection (no change)"
-fi
-
-# Review the diff (will be empty if already patched)
-git diff .gitignore
-```
-
-If the diff shows the new block, commit and push it before generating any keys:
-
-```bash
-git add .gitignore
-git commit -m "gitignore: octavia PKI artifacts and overlay (v1-do-doc-02 §4)"
-git push origin main
-```
-
-**Verify the gitignore is effective** (this is a safety smoketest — touch a fake overlay file and ensure git ignores it):
-
-```bash
-touch overlays/octavia-pki.yaml
-STATUS=$(git status --short overlays/octavia-pki.yaml)
-rm overlays/octavia-pki.yaml
-
-if [ -z "$STATUS" ]; then
-  echo "[OK] gitignore working — overlay file does not show as untracked"
-else
-  echo "[FAIL] gitignore not effective — git sees:"
-  echo "  $STATUS"
-  echo "Stop here. Fix .gitignore syntax before generating any secrets."
-fi
-```
-
-If `[FAIL]`, do not proceed. The error means a generated PKI overlay could be accidentally committed.
-
----
-
-
-## 5. Workspace setup
-
-```bash
-WORKDIR="$HOME/octavia-pki"
-mkdir -p "$WORKDIR"/{issuing-ca,controller-ca,controller,overlay-build}
-chmod 700 "$WORKDIR"
-cd "$WORKDIR"
-echo "Working in: $WORKDIR"
-ls -la "$WORKDIR"
-```
-
-Resulting layout:
-
-```
-$HOME/octavia-pki/
-├── issuing-ca/           # passphrase.txt, .key.enc, .cert.pem
-├── controller-ca/        # passphrase.txt, .key.enc, .cert.pem
-├── controller/           # .key, .csr, .cert.pem, .bundle.pem, .cnf
-└── overlay-build/        # base64 intermediates → consumed by §10
-```
-
----
-
-
-## 6. Generate Issuing CA
-
-EC P-384 key encrypted with random 32-byte passphrase. Self-signed cert, 10-year validity.
-
-```bash
-cd "$WORKDIR/issuing-ca"
-
-# Generate passphrase (no trailing newline — required for clean YAML embedding)
-openssl rand -base64 32 | tr -d '\n' > passphrase.txt
-chmod 600 passphrase.txt
-
-# Sanity-check length (no exit; operator-decided)
-PASS_LEN=$(wc -c < passphrase.txt)
-if [ "$PASS_LEN" -ne 44 ]; then
-  echo "[FAIL] passphrase length is $PASS_LEN bytes, expected 44 — investigate before continuing"
-else
-  echo "[OK] passphrase length: $PASS_LEN bytes"
-fi
-```
-
-> **On the length check:** `openssl rand -base64 32` produces 32 random bytes encoded as base64. Base64 encoding of 32 bytes is 44 characters (including two `=` padding chars). `tr -d '\n'` strips the trailing newline that openssl adds. The resulting file is exactly 44 bytes — verified by `wc -c`.
-
-If the length is wrong, stop and re-run the `openssl rand` line before proceeding. Do NOT continue with a wrong-length passphrase — it will silently break the overlay parsing later.
-
-```bash
-# Generate EC P-384 private key, encrypted with passphrase
-openssl genpkey -algorithm EC \
-  -pkeyopt ec_paramgen_curve:P-384 \
-  -aes-256-cbc \
-  -pass file:passphrase.txt \
-  -out issuing-ca.key.enc
-chmod 600 issuing-ca.key.enc
-
-# Self-sign cert (10 years, SHA-384)
-openssl req -new -x509 -sha384 \
-  -key issuing-ca.key.enc \
-  -passin file:passphrase.txt \
-  -days 3650 \
-  -subj "/CN=VR0 DC0 Omega Cloud Octavia Issuing CA/O=Neumatrix" \
-  -out issuing-ca.cert.pem
-
-# Verify (no exit; visible output for operator)
-echo "=== Issuing CA verification ==="
-openssl x509 -in issuing-ca.cert.pem -noout -dates -subject
-openssl verify -CAfile issuing-ca.cert.pem issuing-ca.cert.pem
-# Expect: issuing-ca.cert.pem: OK
-
-ls -la
-```
-
----
-
-
-## 7. Generate Controller CA
-
-Identical pattern; different CN.
-
-```bash
-cd "$WORKDIR/controller-ca"
-
-openssl rand -base64 32 | tr -d '\n' > passphrase.txt
-chmod 600 passphrase.txt
-
-PASS_LEN=$(wc -c < passphrase.txt)
-if [ "$PASS_LEN" -ne 44 ]; then
-  echo "[FAIL] passphrase length is $PASS_LEN bytes, expected 44 — investigate before continuing"
-else
-  echo "[OK] passphrase length: $PASS_LEN bytes"
-fi
-
-openssl genpkey -algorithm EC \
-  -pkeyopt ec_paramgen_curve:P-384 \
-  -aes-256-cbc \
-  -pass file:passphrase.txt \
-  -out controller-ca.key.enc
-chmod 600 controller-ca.key.enc
-
-openssl req -new -x509 -sha384 \
-  -key controller-ca.key.enc \
-  -passin file:passphrase.txt \
-  -days 3650 \
-  -subj "/CN=VR0 DC0 Omega Cloud Octavia Controller CA/O=Neumatrix" \
-  -out controller-ca.cert.pem
-
-echo "=== Controller CA verification ==="
-openssl x509 -in controller-ca.cert.pem -noout -dates -subject
-openssl verify -CAfile controller-ca.cert.pem controller-ca.cert.pem
-# Expect: controller-ca.cert.pem: OK
-
-ls -la
-```
-
-**Why Controller CA's key is encrypted even though Octavia never uses it:** the Controller CA key is needed for future rotations of the controller cert. Encrypting it (with its own passphrase, separate from Issuing CA's) is defense in depth — if the jumphost is compromised, the key still requires the passphrase to be useful for forging controller certs.
-
----
-
-
-## 8. Generate Controller certificate
-
-EC P-256 key (no encryption — Octavia must read it at startup), CSR with SAN extensions, signed by Controller CA, 2-year validity.
-
-```bash
-cd "$WORKDIR/controller"
-
-# Generate unencrypted EC P-256 key
-openssl genpkey -algorithm EC \
-  -pkeyopt ec_paramgen_curve:P-256 \
-  -out controller.key
-chmod 600 controller.key
-
-# CSR config with SAN extensions
-cat > controller.cnf <<'EOF'
-[req]
-distinguished_name = req_distinguished_name
-req_extensions = v3_req
-prompt = no
-
-[req_distinguished_name]
-CN = octavia-controller.omega.dc0.vr0.cloud.neumatrix.local
-O = Neumatrix
-
-[v3_req]
-keyUsage = critical, digitalSignature, keyEncipherment
-extendedKeyUsage = clientAuth, serverAuth
-subjectAltName = @alt_names
-
-[alt_names]
-DNS.1 = octavia-controller.omega.dc0.vr0.cloud.neumatrix.local
-DNS.2 = octavia.omega.dc0.vr0.cloud.neumatrix.local
-IP.1 = 10.12.4.233
-EOF
-
-# Generate CSR
-openssl req -new -sha256 \
-  -key controller.key \
-  -config controller.cnf \
-  -out controller.csr
-
-# Sign with Controller CA (2 years)
-openssl x509 -req -sha256 \
-  -in controller.csr \
-  -CA "$WORKDIR/controller-ca/controller-ca.cert.pem" \
-  -CAkey "$WORKDIR/controller-ca/controller-ca.key.enc" \
-  -passin file:"$WORKDIR/controller-ca/passphrase.txt" \
-  -CAcreateserial \
-  -days 730 \
-  -extfile controller.cnf \
-  -extensions v3_req \
-  -out controller.cert.pem
-
-# Bundle cert + key (the lb-mgmt-controller-cert option expects both in one PEM)
-cat controller.cert.pem controller.key > controller.bundle.pem
-chmod 600 controller.bundle.pem
-```
-
-**Verify the chain and SAN:**
-
-```bash
-echo "=== Chain verification ==="
-openssl verify -CAfile "$WORKDIR/controller-ca/controller-ca.cert.pem" controller.cert.pem
-# Expect: controller.cert.pem: OK
-
-echo ""
-echo "=== SAN extensions ==="
-openssl x509 -in controller.cert.pem -noout -ext subjectAltName
-# Expect:
-#     DNS:octavia-controller.omega.dc0.vr0.cloud.neumatrix.local,
-#     DNS:octavia.omega.dc0.vr0.cloud.neumatrix.local,
-#     IP Address:10.12.4.233
-
-echo ""
-echo "=== Validity ==="
-openssl x509 -in controller.cert.pem -noout -dates
-# Expect: notAfter ~2 years from today
-
-echo ""
-echo "=== Bundle integrity (cert and key match) ==="
-# Use $HOME paths (not /tmp) per snap-confinement convention used elsewhere in repo
-openssl x509 -in controller.bundle.pem -noout -pubkey > "$WORKDIR/controller/.cert.pub"
-openssl pkey -in controller.bundle.pem -pubout > "$WORKDIR/controller/.key.pub"
-if diff -q "$WORKDIR/controller/.cert.pub" "$WORKDIR/controller/.key.pub" >/dev/null; then
-  echo "[OK] bundle cert/key match"
-else
-  echo "[FAIL] bundle cert/key DO NOT match — investigate before continuing"
-fi
-rm -f "$WORKDIR/controller/.cert.pub" "$WORKDIR/controller/.key.pub"
-```
-
----
-
-
-## 9. Final chain verification
-
-A standalone block to confirm the full chain is sound before consuming for Octavia. All three "verify" lines must show `: OK`. If any do not, stop and investigate before proceeding.
-
-```bash
-cd "$WORKDIR"
-
-echo "=== Issuing CA ==="
-openssl x509 -in issuing-ca/issuing-ca.cert.pem -noout -subject -dates
-openssl verify -CAfile issuing-ca/issuing-ca.cert.pem issuing-ca/issuing-ca.cert.pem
-
-echo ""
-echo "=== Controller CA ==="
-openssl x509 -in controller-ca/controller-ca.cert.pem -noout -subject -dates
-openssl verify -CAfile controller-ca/controller-ca.cert.pem controller-ca/controller-ca.cert.pem
-
-echo ""
-echo "=== Controller cert ==="
-openssl x509 -in controller/controller.cert.pem -noout -subject -dates
-openssl verify -CAfile controller-ca/controller-ca.cert.pem controller/controller.cert.pem
-```
-
-Operator-visible check: the three `verify` lines must all end with `: OK`.
-
----
-
-
-## 10. Base64-encode artifacts
-
-Each base64 file is a single line (no wrapping); each becomes one YAML value.
-
-```bash
-cd "$WORKDIR/overlay-build"
-
-# Issuing CA cert (base64)
-base64 -w0 "$WORKDIR/issuing-ca/issuing-ca.cert.pem" > issuing-cacert.b64
-
-# Issuing CA private key (already encrypted PEM → base64)
-base64 -w0 "$WORKDIR/issuing-ca/issuing-ca.key.enc" > issuing-ca-private-key.b64
-
-# Controller CA cert
-base64 -w0 "$WORKDIR/controller-ca/controller-ca.cert.pem" > controller-cacert.b64
-
-# Controller cert + key bundle
-base64 -w0 "$WORKDIR/controller/controller.bundle.pem" > controller-cert.b64
-
-# Sanity-check sizes (expect 500-2000 chars each)
-wc -c *.b64
-```
-
----
-
-
-## 11. Assemble the overlay file
-
-```bash
-# Read each artifact into shell variables
-ISSUING_CACERT=$(cat "$WORKDIR/overlay-build/issuing-cacert.b64")
-ISSUING_CA_KEY=$(cat "$WORKDIR/overlay-build/issuing-ca-private-key.b64")
-ISSUING_CA_PASS=$(cat "$WORKDIR/issuing-ca/passphrase.txt")
-CONTROLLER_CACERT=$(cat "$WORKDIR/overlay-build/controller-cacert.b64")
-CONTROLLER_CERT=$(cat "$WORKDIR/overlay-build/controller-cert.b64")
-
-# Assemble overlay (passphrase is YAML-quoted; cert blobs are not — they're
-# guaranteed-safe base64 without special chars)
-mkdir -p "$REPO/overlays"
-cat > "$REPO/overlays/octavia-pki.yaml" <<EOF
-# Octavia LBaaS PKI overlay — SENSITIVE — NEVER COMMIT
-# Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) UTC
-# Source: docs/v1-do-doc-02-pki.md
-# Issuing CA, Controller CA, Controller cert all generated fresh per workstream 3a.
-#
-# This file is gitignored. If you see it staged or committed, .gitignore is broken.
-
-applications:
-  octavia:
-    options:
-      lb-mgmt-issuing-cacert: ${ISSUING_CACERT}
-      lb-mgmt-issuing-ca-private-key: ${ISSUING_CA_KEY}
-      lb-mgmt-issuing-ca-key-passphrase: "${ISSUING_CA_PASS}"
-      lb-mgmt-controller-cacert: ${CONTROLLER_CACERT}
-      lb-mgmt-controller-cert: ${CONTROLLER_CERT}
-EOF
-
-chmod 600 "$REPO/overlays/octavia-pki.yaml"
-
-# Unset the shell variables (they held key material)
-unset ISSUING_CACERT ISSUING_CA_KEY ISSUING_CA_PASS CONTROLLER_CACERT CONTROLLER_CERT
-```
-
-**Validate the overlay parses as YAML:**
-
-```bash
-python3 - <<PY
-import yaml
-with open("$REPO/overlays/octavia-pki.yaml") as f:
-    d = yaml.safe_load(f)
-o = d["applications"]["octavia"]["options"]
-print("Keys present:", sorted(o.keys()))
-print("All values non-empty:", all(v for v in o.values()))
-PY
-```
-
-Expected: 5 keys listed; `All values non-empty: True`.
-
-**Confirm gitignore is doing its job:**
-
-```bash
-cd "$REPO"
-git status --short
-echo ""
-echo "If overlays/octavia-pki.yaml appears above as ?? (untracked), STOP."
-echo "Shred the file with: shred -uvz overlays/octavia-pki.yaml"
-echo "Fix .gitignore and regenerate (§4 + §6-11)."
-```
-
-The overlay file must NOT show up in `git status --short`. If it does, the gitignore patch in §4 did not stick.
-
----
-
-
-## 12. Sensitive-file backup
-
-The Issuing CA private key plus its passphrase are the crown jewels of the LB trust domain. Loss → cannot sign new amphora certs (LBs gradually break). Exposure → attacker can forge amphora identities and intercept tenant LB traffic.
-
-**Minimum backup for testcloud:**
-
-```bash
-cd "$HOME"
-BACKUP_NAME="octavia-pki-backup-$(date +%Y%m%d-%H%M%S).tar.gz"
-
-tar -czf "$BACKUP_NAME" -C "$HOME" octavia-pki/
-
-# Encrypt with strong symmetric cipher (will prompt for passphrase interactively)
-gpg --symmetric --cipher-algo AES256 --output "${BACKUP_NAME}.gpg" "$BACKUP_NAME"
-
-# Shred the unencrypted tar (whether gpg succeeded or failed — gpg output is the asset of record)
-if [ -f "${BACKUP_NAME}.gpg" ]; then
-  shred -uvz "$BACKUP_NAME"
-  ls -la "${BACKUP_NAME}.gpg"
-  echo "[OK] backup created and unencrypted tar shredded"
-else
-  echo "[FAIL] gpg encryption did not produce ${BACKUP_NAME}.gpg"
-  echo "       Unencrypted tar still present at: $BACKUP_NAME"
-  echo "       Investigate gpg failure before continuing."
-fi
-```
-
-**Move `${BACKUP_NAME}.gpg` off-host** to your chosen secrets store (admin workstation encrypted drive, password-manager attachment, dedicated secrets vault). Do not leave it on the jumphost long-term — single point of compromise.
-
-**Roosevelt note:** Vault PKI engine stores all of this; no manual backup required. This procedure is testcloud-only.
-
----
-
-
-## 13. Cleanup of intermediates
-
-After successful deploy and post-deploy verification (§14), shred files that are not needed for future rotation:
-
-```bash
-# Optional: shred the base64 intermediates (regeneratable from PEM sources)
-shred -uvz "$WORKDIR/overlay-build/"*.b64
-rmdir "$WORKDIR/overlay-build"
-
-# Optional: shred the CSR (regeneratable if needed)
-shred -uvz "$WORKDIR/controller/controller.csr"
-
-# DO NOT shred any of the following — they are needed for future operations:
-#   - issuing-ca/{issuing-ca.cert.pem, issuing-ca.key.enc, passphrase.txt}
-#   - controller-ca/{controller-ca.cert.pem, controller-ca.key.enc, passphrase.txt}
-#   - controller/{controller.key, controller.cert.pem, controller.bundle.pem, controller.cnf}
-#
-# Specifically:
-#   - Issuing CA artifacts: required for signing new amphoras (Octavia uses them at runtime)
-#   - Controller CA artifacts: required for signing new controller certs (rotation)
-#   - Controller cert/key: required to repopulate the overlay if jumphost is rebuilt
-```
-
-This step runs AFTER §14 verification has confirmed the overlay was consumed correctly.
-
----
-
-
-## 14. Post-deploy verification
-
-After `v1-do-doc-04-deploy.md` completes (`juju deploy` with the overlay), verify Octavia is healthy and the PKI plumbing works. This section is referenced from §13 above as the verification gate.
-
-```bash
-# Octavia charm active/idle
-juju status octavia
-# Expect: octavia/0 active idle
-
-# Octavia services running
-juju ssh octavia/0 -- sudo systemctl is-active octavia-api octavia-worker octavia-housekeeping
-# Expect: 3x "active"
-
-# Confirm PKI files landed on the unit
-juju ssh octavia/0 -- sudo ls -la /etc/octavia/certs/
-# Expect: server_ca.cert.pem, server_ca.key.pem, client_ca.cert.pem, client.cert-and-key.pem
-# (filenames are charm-controlled; presence is what matters)
-
-# Confirm Octavia can use them — verbose health-check from the API
-juju ssh octavia/0 -- sudo journalctl -u octavia-api --since "5 minutes ago" \
-  | grep -iE "(cert|ssl|tls|amphora)" | head -20
-# Expect: no errors related to cert loading
-```
-
-**Smoketest — create a test LB once amphora image is available:**
-
-```bash
-# After octavia-diskimage-retrofit has populated Glance with the amphora image,
-# and the LBaaS Mgmt network is wired (these are downstream deploy steps),
-# a test LB creation exercises the full PKI chain:
-
-source "$HOME/admin-openrc"
-openstack loadbalancer create --name pki-smoketest --vip-subnet-id <provider-subnet>
-
-# Watch for amphora spawn (3-5 minutes typical)
-watch -n5 'openstack loadbalancer show pki-smoketest'
-# Wait for: provisioning_status=ACTIVE, operating_status=ONLINE
-
-# Octavia-worker log should show successful amphora handshake (signed by Issuing CA,
-# trusted via Controller CA):
-juju ssh octavia/0 -- sudo journalctl -u octavia-worker --since "10 minutes ago" \
-  | grep -iE "(amphora|cert)" | tail -20
-# Expect: "amphora <UUID> connection established" or similar
-# Expect: no TLS handshake errors, no cert validation errors
-
-# Cleanup the smoketest LB
-openstack loadbalancer delete pki-smoketest --cascade
-```
-
-If amphora handshake fails with cert errors, the most likely causes are:
-
-1. **SAN mismatch** — the controller's connection to amphora uses the cert's CN/SAN; verify the controller cert SAN (§8) covers all addresses Octavia uses to reach amphorae.
-2. **Bundle/key mismatch** — `lb-mgmt-controller-cert` bundle should contain BOTH the cert and the matching private key; if they're for different keys, handshake fails. (Verified in §8 with the pubkey diff.)
-3. **Encrypted Issuing CA key + wrong passphrase** — verify the passphrase string in the overlay (§11) matches what was used at generation (§6).
-
----
-
-
-## 15. Roosevelt deltas (forward-look)
-
-When this procedure is adapted for Roosevelt bare-metal deploy:
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Issuing CA root | Self-signed | Intermediate signed by Vault root CA |
-| CA storage | Filesystem on jumphost | Vault PKI engine, encrypted at rest |
-| Controller cert validity | 2 years | 90 days |
-| Rotation | Manual (this document re-run) | Automated via Vault + cron + bundle redeploy |
-| Backup | gpg tarball, off-host | Vault's own backup mechanism |
-| Amphora image signing | Out of scope for v1 | Image signed by Vault PKI as well |
-| Procedure file | `runbooks/v1-do-doc-02-pki.md` | New runbook in Roosevelt repo |
-
-The procedure structure (generate Issuing CA → Controller CA → Controller cert → encode → overlay → backup → deploy) remains identical. Roosevelt just sources the CA root from Vault instead of self-signing.
-
----
-
-
-## 16. Rotation/renewal pointer
-
-For testcloud, the 2-year controller cert and 10-year CAs are intentionally "set and forget" — they will outlive the cloud at this scale.
-
-If rotation IS needed before testcloud teardown (e.g., a key leak event), the re-run procedure is:
-
-1. Generate new Controller cert signed by **existing** Controller CA (re-run §8-9 only).
-2. Regenerate the overlay (§11) with the new Controller cert; leave all other values unchanged.
-3. `juju config octavia lb-mgmt-controller-cert=<new-base64>` (single-option update; does not require full bundle redeploy).
-4. Octavia services may need a restart: `juju ssh octavia/0 -- sudo systemctl restart octavia-api octavia-worker octavia-housekeeping`.
-5. Existing amphorae will need to reconnect using the new cert; in-flight LBs may briefly drop. This is acceptable for a security-event rotation.
-
-For Roosevelt, this whole procedure is replaced by Vault automated rotation.
-
----
-
-
-## 17. Acceptance criteria — go/no-go for next step
-
-Before proceeding to Batch B (`v1-do-doc-03-destroy.md`):
-
-- [ ] §4 .gitignore patch applied and effective (overlay file is ignored)
-- [ ] §6 Issuing CA generated; cert verifies OK; passphrase is 44 bytes
-- [ ] §7 Controller CA generated; cert verifies OK; passphrase is 44 bytes
-- [ ] §8 Controller cert generated and signed; chain verifies OK; SAN extensions present; bundle cert/key match
-- [ ] §9 Final chain verification: all three `verify` lines show `: OK`
-- [ ] §10 Four base64 artifacts produced
-- [ ] §11 Overlay file written to `$REPO/overlays/octavia-pki.yaml`; parses as YAML; 5 non-empty option values
-- [ ] §11 `git status --short` does NOT show the overlay file
-- [ ] §12 Encrypted backup created and unencrypted tar shredded; backup moved off-host
-- [ ] §13 deferred until after the deploy step and §14 verification
-
-If all checked, the overlay is ready for `v1-do-doc-04-deploy.md` (Batch B). The overlay is consumed by the deploy command:
-
-```
-juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml --trust
-```
-
-(Note: only one overlay reference. The deprecated `overlays/vr0-dc0-testcloud.yaml` placeholder is not used; the bundle has its testcloud values inline.)
-
----
-
-
-## 18. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-27 | Document created from `runbooks/deprecated/01a-octavia-pki-generation.md` with the following fixes: $REPO path corrected to `$HOME/openstack-caracal-ipv4`; §3 VIP-count grep corrected to `^[[:space:]]+vip:` pattern; old §12 (bundle housekeeping — already done) removed; §13/§14 self-reference fixed; operator-facing `exit 1` blocks replaced with non-exiting `[FAIL]` reports; intermediate diff files moved out of `/tmp` and into `$WORKDIR`. | Batch A drafting |
diff --git a/runbooks/v1-do-doc-03-destroy.md b/runbooks/v1-do-doc-03-destroy.md
deleted file mode 100644
index 3f69d77..0000000
--- a/runbooks/v1-do-doc-03-destroy.md
+++ /dev/null
@@ -1,219 +0,0 @@
-# v1 Do-Document 03 — Teardown / Pre-Deploy Cloud State
-
-**Status:** First execution document of Batch B. Conditional execution — current cloud state determines whether to skip or run the existing teardown runbook.
-
-**Position in sequence:** Runs after `v1-do-doc-02-pki.md` (overlay generated). Runs before `v1-do-doc-04-deploy.md` (juju deploy).
-
-**Type:** Thin pointer to the still-authoritative `runbooks/01-destroy-model.md`. This doc adds the state-detection and routing logic; the destructive procedure itself lives in the existing runbook.
-
-**Cross-references:**
-
-- D-017 (CAPI bootstrap cluster lifecycle) — every cycle is a full rebuild
-- D-018 (teardown strategy) — skip graceful, release MAAS directly
-- `runbooks/01-destroy-model.md` — the authoritative procedure (Phases A→D)
-
----
-
-
-## 1. Purpose & scope
-
-This document determines whether the existing cloud needs to be torn down before deploy, and routes to `runbooks/01-destroy-model.md` if so.
-
-There are exactly three possible states going into this step:
-
-| State | Description | Routing |
-|---|---|---|
-| **Clean** | Cloud already down. All 5 VMs MAAS-Ready, no `openstack` Juju model. | Skip to §3 verification → §4 acceptance |
-| **Dirty** | Cloud up (any unit deployed, any Juju model present). | Execute runbook 01-destroy-model.md in full → return to §3 |
-| **Partial** | Some intermediate state (model destroying, machines not all Ready, etc.). | Execute runbook 01 Phase D remediation; resolve before continuing |
-
-**For the current first Caracal cycle (post-2026-05-27):** state is Clean (verified in `v1-do-doc-01-prep.md` §4.5 + §4.6). This document is effectively a one-step verification.
-
-**For future rebuild cycles:** state will be Dirty after each Caracal deploy. This document then routes through the full runbook 01 procedure.
-
-**Out of scope:**
-
-- The procedure itself — owned by `runbooks/01-destroy-model.md`
-- KVM snapshot capture (per D-017, full rebuild every cycle; pre-existing KVM snapshots remain on disk but are not refreshed)
-
----
-
-
-## 2. State detection
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-cd "$REPO"
-
-echo "=== Juju model state ==="
-if juju models 2>/dev/null | grep -qE "^openstack(\*| )"; then
-  JUJU_STATE="present"
-  juju models | grep "^openstack" || true
-else
-  JUJU_STATE="absent"
-  echo "[OK] no 'openstack' model on current controller"
-fi
-echo "JUJU_STATE=$JUJU_STATE"
-echo ""
-
-echo "=== MAAS machine state ==="
-export MAAS_PROFILE=$(maas list 2>/dev/null | awk 'NR==1 {print $1}')
-if [ -z "$MAAS_PROFILE" ]; then
-  echo "[FAIL] no MAAS profile logged in. Run 'maas login <profile> <url> <key>' first."
-fi
-
-READY_COUNT=$(maas "$MAAS_PROFILE" machines read 2>/dev/null \
-  | python3 -c "
-import json, sys
-machines = json.load(sys.stdin)
-targets = ['openstack0', 'openstack1', 'openstack2', 'openstack3', 'capi-mgmt']
-ready = [m for m in machines if m.get('hostname') in targets and m.get('status_name') == 'Ready']
-print(len(ready))
-")
-
-echo "Cloud-target VMs in Ready state: $READY_COUNT / 5"
-if [ "$READY_COUNT" -eq 5 ]; then
-  MAAS_STATE="all_ready"
-elif [ "$READY_COUNT" -eq 0 ]; then
-  MAAS_STATE="all_deployed_or_other"
-else
-  MAAS_STATE="partial"
-fi
-echo "MAAS_STATE=$MAAS_STATE"
-echo ""
-
-echo "=== Routing decision ==="
-case "$JUJU_STATE:$MAAS_STATE" in
-  "absent:all_ready")
-    echo "[CLEAN] Cloud is already torn down. Skip to §3 verification."
-    ;;
-  "present:all_ready")
-    echo "[ANOMALY] Juju has the openstack model but MAAS shows machines Ready."
-    echo "          Likely: model is in 'destroying' state. Run runbook 01 Phase D remediation."
-    ;;
-  "absent:all_deployed_or_other")
-    echo "[ANOMALY] No Juju model, but machines are not Ready."
-    echo "          Likely: someone else owns the machines, OR commissioning is mid-flight."
-    echo "          Investigate before continuing."
-    ;;
-  "present:all_deployed_or_other")
-    echo "[DIRTY] Cloud is up. Execute runbook 01-destroy-model.md in full."
-    ;;
-  *":partial")
-    echo "[PARTIAL] Mixed MAAS state. Use runbook 01 Phase C (release loop) and Phase D verification."
-    ;;
-  *)
-    echo "[UNKNOWN] state=$JUJU_STATE:$MAAS_STATE — investigate before continuing"
-    ;;
-esac
-```
-
----
-
-
-## 3. Execute (or verify) the teardown procedure
-
-### 3.1 If state is CLEAN (current Caracal cycle)
-
-No execution needed. Skip to §4 acceptance.
-
-Optional sanity check (read-only):
-
-```bash
-echo "=== Confirm no 'openstack' model ==="
-juju models | grep "^openstack" && echo "[FAIL] model still present" || echo "[OK] no openstack model"
-
-echo ""
-echo "=== Confirm all 5 VMs Ready ==="
-maas "$MAAS_PROFILE" machines read 2>/dev/null \
-  | python3 -c "
-import json, sys
-machines = json.load(sys.stdin)
-targets = ['openstack0', 'openstack1', 'openstack2', 'openstack3', 'capi-mgmt']
-for m in machines:
-    h = m.get('hostname', '')
-    if h in targets:
-        print(f'{h}: {m.get(\"status_name\", \"?\")}, owner={m.get(\"owner\") or \"(none)\"}')
-"
-
-echo ""
-echo "=== Confirm OSD qcow2 files still exist (should be ~200 KiB each after wipe) ==="
-ls -la /var/lib/libvirt/images/openstack[0-3]-1.qcow2 2>/dev/null \
-  || echo "[NOTE] OSD qcow2 files not visible to current user; check from jumphost user if needed"
-```
-
-### 3.2 If state is DIRTY (future rebuild cycles)
-
-Execute `runbooks/01-destroy-model.md` in full, in order:
-
-1. **Phase A** — Pre-destroy capture (~30 sec). Captures `juju export-bundle`, `juju status`, `juju models`, `juju controllers` to `$HOME/backups/pre-caracal-destroy-<TS>/`. Updates `$HOME/.last-pre-caracal-destroy-backup` pointer.
-
-2. **Phase B** — Force-destroy the Juju model. Returns in ~1-2 min; reaping continues for ~5-10 min. Command:
-
-   ```
-   juju destroy-model openstack --force --no-wait --destroy-storage --no-prompt
-   ```
-
-3. **Phase C** — Release MAAS machines (parallel with Phase B; ~5 min). Either Path 1 (MAAS UI) or Path 2 (CLI loop). The CLI loop is filtered by owner — only releases machines you own.
-
-4. **Phase D** — Verification (~1 min). Confirms model is gone and all 5 VMs are Ready.
-
-**Critical pre-flight before Phase B:** verify you are about to destroy the right model. The destruction is not undoable short of restoring KVM snapshots:
-
-```bash
-juju models
-juju status --model openstack 2>/dev/null | head -20
-```
-
-Confirm the model name and the unit counts match what you expect to lose.
-
-### 3.3 If state is PARTIAL or ANOMALY
-
-Use runbook 01 Phase D's remediation block (lines 161-173 of `runbooks/01-destroy-model.md`):
-
-```bash
-juju machines -m openstack --format=yaml 2>/dev/null
-
-# For each lingering machine ID:
-juju remove-machine -m openstack --force <id>
-
-# Then re-attempt model removal:
-juju destroy-model openstack --force --no-wait --no-prompt
-```
-
-If the model is still listed after the above, escalate — controller-side state may be corrupted (rare).
-
----
-
-
-## 4. Acceptance criteria — go/no-go for v1-do-doc-04-deploy
-
-Before proceeding to deploy:
-
-- [ ] `juju models` does NOT list `openstack`
-- [ ] All 5 VMs (`openstack0`, `openstack1`, `openstack2`, `openstack3`, `capi-mgmt`) report MAAS status `Ready`, owner `(none)`
-- [ ] If state was DIRTY: `$HOME/.last-pre-caracal-destroy-backup` exists and points to a populated backup directory
-- [ ] If OSD wipe is needed (rebuild after first Caracal cycle): verify the OSD qcow2 wipe procedure was executed per the 2026-05-22 protocol
-
-If all checked, proceed to `v1-do-doc-04-deploy.md`.
-
----
-
-
-## 5. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Teardown target | 5 KVM VMs on jumphost | Bare-metal MAAS-managed servers |
-| Phase C release | libvirt VM owned by current MAAS user | Bare-metal owned by current MAAS user |
-| OSD qcow2 wipe | Yes (libvirt secondary disks survive MAAS release) | No (real disks; MAAS commissioning wipes them) |
-| Backup directory | `$HOME/backups/pre-caracal-destroy-*/` | Same convention, on bastion |
-
----
-
-
-## 6. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-27 | Document created. Thin pointer to `runbooks/01-destroy-model.md` with state-detection routing for clean/dirty/partial cases. | Batch B drafting |
diff --git a/runbooks/v1-do-doc-04-deploy.md b/runbooks/v1-do-doc-04-deploy.md
deleted file mode 100644
index 1b62e89..0000000
--- a/runbooks/v1-do-doc-04-deploy.md
+++ /dev/null
@@ -1,513 +0,0 @@
-# v1 Do-Document 04 — Caracal Bundle Deploy
-
-**Status:** Second execution document of Batch B. First cloud-mutating step in the v1 deploy sequence. Triggers MAAS provisioning of 4 hosts, LXD container creation, charm installation, and the initial relation cascade.
-
-**Position in sequence:** Runs after `v1-do-doc-03-destroy.md` (state confirmed Clean: 5 VMs Ready, no openstack model). Runs before `v1-do-doc-05-vault-init.md` (manual Vault init).
-
-**Cross-references:**
-
-- D-001 (Path 2A: Juju-bundle paradigm)
-- D-002 (channel matrix)
-- D-006 (Vault HA backend; etcd + easyrsa bootstrap)
-- D-007 (Magnum Layer A only at this step; Layer B is Batch C)
-- D-017 (CAPI bootstrap cluster lifecycle; not touched in this doc — that's Batch C)
-- `bundle.yaml` (canonical deploy artifact)
-- `overlays/octavia-pki.yaml` (gitignored; from v1-do-doc-02)
-
----
-
-
-## 1. Purpose & scope
-
-Execute the Caracal-bundle deploy and watch the model settle to a known-incomplete state: every charm reaches `active/idle` EXCEPT those waiting on `vault:certificates`, which sit in `blocked: 'certs not present yet'` until Vault is initialized in v1-do-doc-05.
-
-**What this document does:**
-
-- `juju add-model openstack`
-- `juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml`
-- Watch the model settle (~60-90 minutes typical for this testcloud size)
-- Verify Octavia received the PKI material on disk (the explicit on-disk verification — §8)
-- Verify expected pre-Vault state: Vault charm blocked awaiting init; cert-dependent charms blocked awaiting Vault; non-cert-dependent charms active/idle
-
-**What this document does NOT do:**
-
-- Initialize Vault (next doc, v1-do-doc-05)
-- Run any post-Vault functional verification of Octavia LB (deferred to v1-do-doc-02 §14, executed after v1-do-doc-05 completes)
-- Magnum Keystone domain setup (Batch C, v1-do-doc-06)
-- CAPI bootstrap or Magnum driver graft (Batch C)
-- Tenant resources (Batch D)
-
-**Out of scope:**
-
-- KVM snapshot capture (per D-017, snapshots are not refreshed each cycle; pre-existing snapshots remain as last-resort safety net)
-- NetBox VIP IPAddress writes (pinned post-deploy for external NetBox-engineer review; D-010 relaxed for v1)
-
----
-
-
-## 2. Decisions captured
-
-| Decision | Choice | Notes |
-|---|---|---|
-| Model name | `openstack` | Matches `runbooks/01-destroy-model.md` Phase B target |
-| Deploy command | `juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml` | One overlay; `vr0-dc0-testcloud.yaml` was a placeholder and is empty per pre-deploy review |
-| `--trust` flag | Not used | Standard OpenStack charms on MAAS do not require bundle-level trust. If a specific charm needs it post-deploy (none expected for Caracal v1), apply targeted `juju trust <app>` then. |
-| Settle wait | Manual watch via `juju status --watch 30s`; ~60-90 min typical | Charms cycle blocked → maintenance → active/idle |
-| Expected pre-Vault end state | Vault blocked; cert-relation consumers blocked; everything else active/idle | See §7.3 for the explicit blocked-charm list |
-| PKI on-disk verification | Files-on-disk + fingerprint compare after Octavia config-changed hook completes | §8 — the explicit operator-asked confirmation |
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| `v1-do-doc-01-prep.md` ✓ (state-check passed) | Manual confirmation |
-| `v1-do-doc-02-pki.md` ✓ (overlay generated) | `test -f "$REPO/overlays/octavia-pki.yaml"` |
-| `v1-do-doc-03-destroy.md` ✓ (no `openstack` model, 5 VMs Ready) | Run §2 state-detection block from doc-03 again — should still return CLEAN |
-| Pre-deploy fixes all committed and pulled locally | Verified in v1-do-doc-01 §4.3 |
-
-**Shell context — paste once:**
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-cd "$REPO"
-echo "REPO=$REPO"
-test -f overlays/octavia-pki.yaml && echo "[OK] PKI overlay present" || echo "[FAIL] missing overlay"
-git status --short
-# Expect: clean working tree
-```
-
----
-
-
-## 4. Pre-flight checks (all must pass)
-
-These are READ-ONLY safety checks. Stop if any FAIL.
-
-```bash
-cd "$REPO"
-
-echo "=== 4.1 bundle.yaml YAML parses ==="
-python3 -c "import yaml; yaml.safe_load(open('bundle.yaml'))" \
-  && echo "[OK] bundle.yaml parses" || echo "[FAIL] bundle.yaml YAML error"
-echo ""
-
-echo "=== 4.2 octavia-pki overlay YAML parses ==="
-python3 -c "
-import yaml
-d = yaml.safe_load(open('overlays/octavia-pki.yaml'))
-o = d['applications']['octavia']['options']
-keys = sorted(o.keys())
-expected = ['lb-mgmt-controller-cacert','lb-mgmt-controller-cert','lb-mgmt-issuing-ca-key-passphrase','lb-mgmt-issuing-ca-private-key','lb-mgmt-issuing-cacert']
-print('Keys in overlay:', keys)
-print('All 5 keys present:', keys == expected)
-print('All values non-empty:', all(v for v in o.values()))
-"
-echo ""
-
-echo "=== 4.3 ceph-osd has no storage block (pre-deploy fix #1) ==="
-grep -A 12 "^  ceph-osd:" bundle.yaml | grep "^    storage:" \
-  && echo "[FAIL] storage block present — pre-deploy fix not applied" \
-  || echo "[OK] no storage block under ceph-osd"
-echo ""
-
-echo "=== 4.4 expected-osd-count is 4 (matches one OSD per host) ==="
-grep -A 8 "^  ceph-mon:" bundle.yaml | grep "expected-osd-count: 4" \
-  && echo "[OK] expected-osd-count: 4" \
-  || echo "[FAIL] expected-osd-count is not 4"
-echo ""
-
-echo "=== 4.5 11 VIPs declared (Designate deferred to v2 per D-019) ==="
-VIP_COUNT=$(grep -cE "^[[:space:]]+vip: 10\.12\.4\." bundle.yaml)
-echo "VIP count: $VIP_COUNT (expect 11)"
-echo ""
-
-echo "=== 4.6 No model named 'openstack' exists ==="
-juju models | grep "^openstack" \
-  && echo "[FAIL] model 'openstack' already exists — re-run doc-03 destroy" \
-  || echo "[OK] no openstack model"
-echo ""
-
-echo "=== 4.7 All 5 cloud-target VMs MAAS-Ready ==="
-export MAAS_PROFILE=$(maas list 2>/dev/null | awk 'NR==1 {print $1}')
-maas "$MAAS_PROFILE" machines read 2>/dev/null \
-  | python3 -c "
-import json, sys
-machines = json.load(sys.stdin)
-targets = ['openstack0', 'openstack1', 'openstack2', 'openstack3', 'capi-mgmt']
-ready = sum(1 for m in machines if m.get('hostname') in targets and m.get('status_name') == 'Ready' and not m.get('owner'))
-print(f'Ready + unowned: {ready} / 5')
-print('[OK]' if ready == 5 else '[FAIL]')
-"
-echo ""
-
-echo "=== 4.8 Juju controller available ==="
-juju controllers
-juju show-controller 2>/dev/null | head -10
-echo ""
-
-echo "=== 4.9 Disk space on /var/lib/libvirt/images ==="
-df -h /var/lib/libvirt/images 2>/dev/null
-echo "  Need ≥ 4 × 8 GiB for openstack0-3 root + LXD container space; ≥ 4 × 512 GiB OSD qcow2 already allocated"
-```
-
-If any check above does not show `[OK]` (or the expected value), stop and investigate before continuing.
-
----
-
-
-## 5. Add the Juju model
-
-```bash
-juju add-model openstack
-juju model-config -m openstack | head -20
-```
-
-Expected: model `openstack` created on the current controller. `juju models` should now show it.
-
-> **Optional model-config tweaks (only if you have a reason):**
->
-> - `default-base: ubuntu@22.04/stable` — already in the bundle's top-level config; model-level override not needed.
-> - `transmit-vendor-metrics=false` — privacy posture; testcloud doesn't need to phone home. Optional.
->
-> For this v1 cycle, leave model-config at defaults. Tweaks are easier to debug when only one is changed at a time.
-
----
-
-
-## 6. Deploy command
-
-```bash
-cd "$REPO"
-
-# Confirm working dir and overlay
-pwd
-ls -la bundle.yaml overlays/octavia-pki.yaml
-echo ""
-
-# Deploy — this returns in a few seconds; actual provisioning runs in background
-juju deploy ./bundle.yaml --overlay overlays/octavia-pki.yaml -m openstack
-```
-
-Expected output: a long list of deploy actions ("Deploying ...", "Resolving ...", "Located bundle ..."). Then control returns to the prompt.
-
-If the deploy command itself errors (YAML syntax, charm-not-found, etc.), stop here. The bundle has not started provisioning yet — fix and rerun.
-
----
-
-
-## 7. Settle watch
-
-Provisioning takes **60-90 minutes typical** for this testcloud size on this jumphost. The bundle requests MAAS-deploy of 4 hosts + creation of ~25 LXD containers + 30+ charm installs + relation establishment.
-
-### 7.1 Recommended watch command
-
-In a dedicated terminal (don't share with the destroy / deploy terminal — interaction can interrupt screen redraws):
-
-```bash
-juju status --color --watch 30s -m openstack
-```
-
-Refreshes every 30 seconds. Ctrl+C to exit.
-
-### 7.2 Expected progression
-
-Rough timeline (your mileage may vary):
-
-| Elapsed | What to expect |
-|---|---|
-| 0-3 min | MAAS commissioning starts on openstack0-3 (boot, fingerprint, partition) |
-| 3-10 min | Ubuntu install on openstack0-3 via MAAS preseed |
-| 10-15 min | Hosts in Juju show `pending` → `started`. LXD service comes up on each |
-| 15-30 min | LXD containers being created; subordinate charms (mysql-router, hacluster) attaching |
-| 30-50 min | Charm config-changed hooks running; relations forming; databases bootstrapping |
-| 50-90 min | Most charms reach `blocked` (waiting on Vault) or `active/idle` (no Vault dep) |
-| 90+ min | Settle stabilizes at the pre-Vault end state |
-
-If progress visibly stalls for >15 minutes in the middle of the timeline, see §7.4.
-
-### 7.3 Pre-Vault expected end state
-
-When the model has settled (stops progressing for >5 minutes), this is what `juju status` should show:
-
-**`blocked` (waiting on Vault):**
-
-The following charms have `:certificates` relations to `vault:certificates` and CANNOT reach active/idle until Vault is initialized in v1-do-doc-05:
-
-- `vault` itself — status: `Vault needs to be initialized` (this is the trigger to run doc-05)
-- `mysql-innodb-cluster` (needs vault cert for inter-instance TLS)
-- `keystone` (Keystone API TLS)
-- `glance` (Glance API TLS)
-- `nova-cloud-controller`, `placement`, `neutron-api`, `neutron-api-plugin-ovn`, `ovn-central`, `ovn-chassis`, `ovn-chassis-octavia`
-- `cinder`, `octavia`, `octavia-dashboard`
-- `barbican`, `barbican-vault`
-- `magnum`, `magnum-dashboard`
-- `glance-simplestreams-sync`, `openstack-dashboard`, `ceph-radosgw`
-- `octavia-diskimage-retrofit` (subordinate of glance-simplestreams-sync)
-- All 11 `*-hacluster` subordinates indirectly (because their principal is blocked; Designate's hacluster removed per D-019)
-
-**`active/idle` (no Vault dependency):**
-
-- `rabbitmq-server`
-- `etcd` (uses easyrsa for its OWN TLS, not Vault)
-- `easyrsa`
-- `nova-compute` (no `:certificates` relation directly; it gets ceph keys via ceph-mon)
-- `ceph-mon`, `ceph-osd` (Ceph cluster bootstraps independently)
-
-**Note on the chicken-and-egg:**
-
-Per D-006 (Vault HA backend), etcd's TLS is bootstrapped by easyrsa via the `easyrsa:client ↔ etcd:certificates` relation. This is what lets etcd come up active/idle BEFORE Vault is initialized. Then Vault uses etcd as its HA backend. Watch that `easyrsa/0` and `etcd/{0,1,2}` reach active/idle within the first 30 minutes; if etcd stays blocked beyond that, easyrsa-related certs likely didn't flow.
-
-### 7.4 Stalls and remediation
-
-If progress stalls for >15 min in the middle of the timeline:
-
-```bash
-# Find which units are blocked or in error state
-juju status -m openstack | grep -E "(blocked|error|maintenance)"
-
-# For any unit in error state, get its log
-juju show-status-log <unit-name> -m openstack
-# E.g.: juju show-status-log keystone/0
-
-# For deeper inspection
-juju ssh <unit-name> -m openstack -- sudo tail -200 /var/log/juju/unit-<unit-name>.log
-```
-
-Common stalls:
-
-1. **MAAS commissioning failure on a host** → check MAAS UI; may need to re-commission the host
-2. **LXD container creation failure** → check `lxc list` on the host; container quotas, image availability
-3. **Charm hook error** → check unit log; often a transient cloud-init issue; `juju resolved <unit>` may help
-4. **Relation never forms** → both ends must declare correct endpoint names; cross-check bundle
-
-Per D-018, do not pursue graceful recovery from major errors at this stage — full teardown via v1-do-doc-03 and redeploy is the canonical "reset" path.
-
----
-
-
-## 8. Post-deploy PKI verification — explicit on-disk confirmation
-
-**This section addresses the operator-asked confirmation that the PKI overlay made it onto the Octavia unit's filesystem after the charm's config-changed hook completes.**
-
-Run this section **after** `octavia/0` has progressed past `pending`/`maintenance` and reached at least a `blocked` state (i.e., the charm has run its install + config-changed hooks but is waiting on Vault for the API TLS cert). The `lb-mgmt-*` options are consumed by the config-changed hook regardless of Vault status — so on-disk material should be present even with octavia/0 in `blocked`.
-
-### 8.1 Confirm the unit is past config-changed
-
-```bash
-echo "=== Octavia unit status ==="
-juju status octavia -m openstack
-# Expect: octavia/0 in 'blocked' (cert relation pending) or 'maintenance' (still configuring).
-# If status is still 'pending' or 'allocating', wait and re-run this section.
-```
-
-### 8.2 Inspect on-disk PKI directory
-
-```bash
-echo "=== /etc/octavia/certs/ contents ==="
-juju ssh octavia/0 -m openstack -- sudo ls -la /etc/octavia/certs/
-```
-
-Expected: 4-5 PEM files. The exact filenames depend on the charm revision; commonly:
-
-- `server_ca.cert.pem` — Issuing CA cert (consumed from `lb-mgmt-issuing-cacert`)
-- `server_ca.key.pem` — Issuing CA encrypted private key (consumed from `lb-mgmt-issuing-ca-private-key`)
-- `client_ca.cert.pem` — Controller CA cert (consumed from `lb-mgmt-controller-cacert`)
-- `client.cert-and-key.pem` — Controller cert + key bundle (consumed from `lb-mgmt-controller-cert`)
-
-If the directory is empty or missing, the config-changed hook hasn't run yet or failed. Re-check unit status; see §7.4 remediation.
-
-> **[unverified, flagging]:** the exact filenames above are typical for recent charm-octavia revisions but may vary. The verification below uses fingerprint comparison (content-based), which is filename-agnostic — adapt the filenames in the loop if `ls` shows different ones.
-
-### 8.3 Stage cert content from unit for comparison
-
-```bash
-mkdir -p "$HOME/pki-verify"
-chmod 700 "$HOME/pki-verify"
-cd "$HOME/pki-verify"
-
-# Pull whatever PEM files are present in /etc/octavia/certs/
-juju ssh octavia/0 -m openstack -- sudo ls /etc/octavia/certs/ 2>/dev/null | \
-  while read -r f; do
-    case "$f" in
-      *.pem|*.crt)
-        echo "Pulling $f ..."
-        juju ssh octavia/0 -m openstack -- sudo cat "/etc/octavia/certs/$f" \
-          > "$HOME/pki-verify/unit-$f"
-        ;;
-    esac
-  done
-
-ls -la "$HOME/pki-verify/"
-```
-
-### 8.4 Fingerprint comparison — CA certs
-
-```bash
-echo "=== Issuing CA fingerprint comparison ==="
-# Find the issuing CA on the unit (charm naming: typically server_ca.cert.pem)
-UNIT_FILE=$(ls "$HOME/pki-verify/" | grep -E "^unit-server.*ca.*\.cert\.pem$" | head -1)
-if [ -z "$UNIT_FILE" ]; then
-  echo "[WARN] no unit-server*ca*.cert.pem file found — list and adapt:"
-  ls "$HOME/pki-verify/"
-else
-  UNIT_FP=$(openssl x509 -in "$HOME/pki-verify/$UNIT_FILE" -noout -fingerprint -sha256 2>/dev/null | cut -d= -f2)
-  SRC_FP=$(openssl x509 -in "$HOME/octavia-pki/issuing-ca/issuing-ca.cert.pem" -noout -fingerprint -sha256 | cut -d= -f2)
-  echo "Unit ($UNIT_FILE):    $UNIT_FP"
-  echo "Jumphost (issuing-ca): $SRC_FP"
-  if [ "$UNIT_FP" = "$SRC_FP" ]; then
-    echo "[OK] Issuing CA cert on unit matches jumphost source"
-  else
-    echo "[FAIL] fingerprints DIFFER — investigate before continuing"
-  fi
-fi
-echo ""
-
-echo "=== Controller CA fingerprint comparison ==="
-UNIT_FILE=$(ls "$HOME/pki-verify/" | grep -E "^unit-client.*ca.*\.cert\.pem$" | head -1)
-if [ -z "$UNIT_FILE" ]; then
-  echo "[WARN] no unit-client*ca*.cert.pem file found — list and adapt:"
-  ls "$HOME/pki-verify/"
-else
-  UNIT_FP=$(openssl x509 -in "$HOME/pki-verify/$UNIT_FILE" -noout -fingerprint -sha256 2>/dev/null | cut -d= -f2)
-  SRC_FP=$(openssl x509 -in "$HOME/octavia-pki/controller-ca/controller-ca.cert.pem" -noout -fingerprint -sha256 | cut -d= -f2)
-  echo "Unit ($UNIT_FILE):       $UNIT_FP"
-  echo "Jumphost (controller-ca): $SRC_FP"
-  if [ "$UNIT_FP" = "$SRC_FP" ]; then
-    echo "[OK] Controller CA cert on unit matches jumphost source"
-  else
-    echo "[FAIL] fingerprints DIFFER — investigate before continuing"
-  fi
-fi
-```
-
-### 8.5 Controller cert bundle verification
-
-The `lb-mgmt-controller-cert` value contains BOTH the controller cert AND its key, concatenated. On the unit it lands as a single PEM bundle. Confirm:
-
-1. The cert in the bundle matches the cert we generated.
-2. The key in the bundle matches the cert (private key proves possession).
-
-```bash
-echo "=== Controller cert+key bundle verification ==="
-UNIT_FILE=$(ls "$HOME/pki-verify/" | grep -E "^unit-client.*\.pem$" | grep -v "ca" | head -1)
-# Common name: client.cert-and-key.pem
-if [ -z "$UNIT_FILE" ]; then
-  echo "[WARN] no controller cert bundle found on unit — list and adapt:"
-  ls "$HOME/pki-verify/"
-else
-  echo "Found bundle: $UNIT_FILE"
-
-  # Compare cert fingerprint
-  UNIT_FP=$(openssl x509 -in "$HOME/pki-verify/$UNIT_FILE" -noout -fingerprint -sha256 2>/dev/null | cut -d= -f2)
-  SRC_FP=$(openssl x509 -in "$HOME/octavia-pki/controller/controller.cert.pem" -noout -fingerprint -sha256 | cut -d= -f2)
-  echo "Unit cert FP:    $UNIT_FP"
-  echo "Source cert FP:  $SRC_FP"
-  [ "$UNIT_FP" = "$SRC_FP" ] && echo "[OK] cert match" || echo "[FAIL] cert mismatch"
-
-  # Confirm cert+key in bundle match each other (proof of possession)
-  CERT_PUB=$(openssl x509 -in "$HOME/pki-verify/$UNIT_FILE" -noout -pubkey 2>/dev/null | openssl md5)
-  KEY_PUB=$(openssl pkey -in "$HOME/pki-verify/$UNIT_FILE" -pubout 2>/dev/null | openssl md5)
-  echo "Cert pubkey md5: $CERT_PUB"
-  echo "Key pubkey md5:  $KEY_PUB"
-  [ "$CERT_PUB" = "$KEY_PUB" ] && echo "[OK] cert and key in bundle are paired" || echo "[FAIL] cert and key DO NOT match"
-fi
-```
-
-### 8.6 Issuing CA encrypted key + passphrase verification
-
-The Issuing CA's encrypted key sits on the unit. Confirm the passphrase from `octavia.conf` can decrypt it. This is the test that proves the runtime amphora-signing path will work once Octavia comes up post-Vault.
-
-```bash
-echo "=== Passphrase round-trip test ==="
-
-# Pull the passphrase from octavia.conf
-UNIT_PASS=$(juju ssh octavia/0 -m openstack -- \
-  sudo grep "^ca_private_key_passphrase" /etc/octavia/octavia.conf 2>/dev/null | head -1 | cut -d= -f2- | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//')
-
-# Pull the encrypted key
-UNIT_KEY=$(ls "$HOME/pki-verify/" | grep -E "^unit-server.*key.*\.pem$" | head -1)
-
-if [ -z "$UNIT_PASS" ]; then
-  echo "[WARN] passphrase line not found in octavia.conf — may not be present until vault-init"
-elif [ -z "$UNIT_KEY" ]; then
-  echo "[WARN] no unit-server*key*.pem found"
-else
-  # Try to decrypt the key using the passphrase from the unit
-  if openssl pkey -in "$HOME/pki-verify/$UNIT_KEY" -passin "pass:$UNIT_PASS" -noout 2>/dev/null; then
-    echo "[OK] passphrase in octavia.conf decrypts the on-disk Issuing CA key"
-  else
-    echo "[FAIL] passphrase did NOT decrypt the key — overlay value mismatch"
-  fi
-
-  # Also confirm passphrase matches what we generated on jumphost
-  SRC_PASS=$(cat "$HOME/octavia-pki/issuing-ca/passphrase.txt")
-  if [ "$UNIT_PASS" = "$SRC_PASS" ]; then
-    echo "[OK] passphrase on unit matches jumphost source"
-  else
-    echo "[FAIL] passphrase on unit does NOT match jumphost source"
-  fi
-fi
-
-# Clear the passphrase from shell
-unset UNIT_PASS SRC_PASS
-```
-
-> **Note:** the `ca_private_key_passphrase` line in `octavia.conf` may not appear until the cert relation completes. If §8.6 reports `[WARN] passphrase line not found`, that is **expected at pre-Vault state** — the charm may defer writing the full `[certificates]` section until the cert relation has flowed. Re-run §8.6 after v1-do-doc-05 completes.
-
-### 8.7 Cleanup
-
-```bash
-# Optional: shred the temp copies of the cert material
-shred -uvz "$HOME/pki-verify/"*.pem 2>/dev/null
-rmdir "$HOME/pki-verify" 2>/dev/null
-```
-
-The unit retains the originals; the jumphost-side originals are at `$HOME/octavia-pki/` (left in place per v1-do-doc-02 §13).
-
----
-
-
-## 9. Acceptance criteria — go/no-go for v1-do-doc-05-vault-init
-
-Before proceeding to Vault init:
-
-- [ ] §4 all pre-flight checks `[OK]`
-- [ ] §5 model `openstack` created
-- [ ] §6 deploy command completed without bundle-level errors
-- [ ] §7 model has settled (no progress for >5 min); pre-Vault end state matches §7.3
-- [ ] §7.3 critical infra `active/idle`: `rabbitmq-server`, `etcd/{0,1,2}`, `easyrsa`, `ceph-mon/{0,1,2}`, `ceph-osd/{0,1,2,3}`, `nova-compute/{0,1,2,3}`
-- [ ] §7.3 Vault is `blocked: Vault needs to be initialized` (the trigger for doc-05)
-- [ ] §8.2 PKI files present in `/etc/octavia/certs/` on octavia/0
-- [ ] §8.4 Issuing CA fingerprint match `[OK]`
-- [ ] §8.4 Controller CA fingerprint match `[OK]`
-- [ ] §8.5 Controller cert bundle cert+key paired `[OK]`
-- [ ] §8.6 passphrase round-trip `[OK]` (or `[WARN] not yet in conf` — acceptable if pre-Vault)
-
-If all checked, proceed to `v1-do-doc-05-vault-init.md`.
-
----
-
-
-## 10. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Bundle | `bundle.yaml` (single file) | Multi-environment overlay structure |
-| Hosts | 4 KVM VMs | Bare-metal MAAS-managed servers |
-| LXD container layout | Dense (10+ on machine 8) | More spread; possibly real units instead of LXD for some apps |
-| Overlay set | `overlays/octavia-pki.yaml` only | Site overlay (machine assignments, NIC MACs) + Vault overlay + PKI overlay |
-| Settle time | 60-90 minutes | Likely 2-4 hours (more hosts, real provisioning) |
-| Octavia PKI source | Operator-generated, overlay-distributed | Vault PKI engine |
-| Octavia PKI verification | This §8 procedure | Vault-side audit trail; no manual comparison needed |
-
----
-
-
-## 11. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-27 | Document created. Replaces `runbooks/deprecated/02-deploy.md` (placeholder). Adds explicit §8 on-disk PKI verification per operator request. | Batch B drafting |
diff --git a/runbooks/v1-do-doc-05-vault-init.md b/runbooks/v1-do-doc-05-vault-init.md
deleted file mode 100644
index e2e7d7a..0000000
--- a/runbooks/v1-do-doc-05-vault-init.md
+++ /dev/null
@@ -1,596 +0,0 @@
-# v1 Do-Document 05 — Vault Initialization & Cert-Relation Cascade
-
-**Status:** Third execution document of Batch B. Manual three-step Vault bring-up plus regeneration of admin-openrc. Last document in Batch B.
-
-**Position in sequence:** Runs after `v1-do-doc-04-deploy.md` (model settled at pre-Vault end state). Runs before `v1-do-doc-06-magnum-domain.md` (Batch C).
-
-**Cross-references:**
-
-- D-006 (Vault HA backend — etcd + easyrsa)
-- D-009 (Hacluster modeling at testcloud scale)
-- D-011 §6 (validation: Vault unseal + auto-unseal-after-reboot pattern)
-- `bundle.yaml` Vault block (channel 1.8/stable, vip 10.12.4.236, vault-mysql-router subordinate)
-- OpenStack charm-deployment-guide Appendix C — Vault (initialise / unseal / authorise)
-
----
-
-
-## 1. Purpose & scope
-
-Initialize Vault, unseal it, authorize the charm, then watch the `vault:certificates` relation cascade flow certs to every API charm. The cascade unblocks roughly 20 charms that were `blocked` after doc-04.
-
-**The three manual Vault steps:**
-
-1. **Initialise** — generate the master encryption key + unseal keys + root token. ONE-SHOT per Vault lifetime. The unseal keys are the disaster-recovery material.
-2. **Unseal** — provide 3-of-5 unseal keys to decrypt Vault's master key. Required after every Vault restart (including post-bundle-deploy and post-host-reboot).
-3. **Authorize** — give the Vault charm a Vault token so it can manage policies, app roles, secrets storage for OpenStack consumers.
-
-**What this document does:**
-
-- Discovers Vault's address (HTTP at this point — pre-TLS)
-- Runs `vault operator init` and captures the 5 unseal keys + 1 root token
-- Runs `vault operator unseal` 3 times (with 3 different keys)
-- Runs `juju run vault/leader authorize-charm token=...` to graft the charm
-- Watches the cert-relation cascade settle
-- Regenerates `$HOME/admin-openrc` against the new Keystone
-- Smokes the post-Vault state: every charm `active/idle`
-
-**What this document does NOT do:**
-
-- Set up auto-unseal (Vault's transit-engine-based auto-unseal pattern) — out of scope for v1; manual unseal after host reboot is acceptable. Roosevelt may revisit.
-- Set up Vault PKI engine for tenant-side use — out of scope for v1.
-- Provision tenant resources or DNS zones (Batch D)
-- Magnum domain or CAPI work (Batch C)
-
-**Out of scope security note:** the unseal keys captured in §3 are the disaster-recovery material. Per the Caracal_Rebuild handoff, the prior cycle's keys are accepted lost. The keys generated HERE need a secure off-host home — operator decision (admin workstation encrypted vault, password manager attachment, dedicated secrets store). For Roosevelt this becomes a real key-management procedure.
-
----
-
-
-## 2. Decisions captured
-
-| Decision | Choice | Notes |
-|---|---|---|
-| Key shares / threshold | 5 keys, threshold 3 | Standard Shamir's-secret-sharing posture; allows quorum-of-3 unseal |
-| Vault address scheme | HTTP via unit IP for init/unseal/authorize; HTTPS via VIP thereafter | Vault has no TLS until authorize-charm flips it on |
-| Authorize-charm pattern | Direct `token=<root-token>` parameter (channel 1.8/stable convention) | Newer revisions may require `token-secret-id=` via Juju secret; verify with `juju show-action vault authorize-charm` first |
-| Admin-openrc location | `$HOME/admin-openrc` | Same path as prior cloud; overwritten |
-| Admin domain/project | Charmed-Keystone defaults: user=admin, user-domain=admin_domain, project=admin_domain | `[unverified, flagging]` for the project — older charm versions used `admin` for project; verify by `openstack token issue` |
-| Unseal key storage | Operator decision — secure off-host | This document warns; doesn't dictate the where |
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| `v1-do-doc-04-deploy.md` ✓ (model settled, pre-Vault end state confirmed) | Manual; re-check via §4.1 below |
-| Octavia PKI on-disk verification `[OK]` (doc-04 §8) | Manual |
-| `vault` CLI installed on jumphost | `command -v vault && vault --version` (any 1.7+ works for client) |
-| Juju controller still reachable | `juju controllers` |
-
-**Shell context — paste once:**
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-cd "$REPO"
-echo "REPO=$REPO"
-```
-
-**Install vault CLI if missing** (using the Hashicorp APT repo; one-time per jumphost):
-
-```bash
-if ! command -v vault >/dev/null 2>&1; then
-  echo "vault CLI not present. Install via Hashicorp APT repo:"
-  echo "  wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
-  echo "  echo \"deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com \$(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/hashicorp.list"
-  echo "  sudo apt update && sudo apt install vault"
-  echo "  (After install, you may need 'sudo setcap cap_ipc_lock= /usr/bin/vault' on hosts without IPC_LOCK capability.)"
-else
-  vault --version
-fi
-```
-
----
-
-
-## 4. Pre-flight: confirm pre-Vault state
-
-```bash
-echo "=== 4.1 Vault status (expect: blocked, 'Vault needs to be initialized') ==="
-juju status vault -m openstack
-
-echo ""
-echo "=== 4.2 Other charms expected blocked on vault (sample) ==="
-juju status -m openstack keystone glance neutron-api octavia magnum 2>/dev/null | grep -E "(blocked|active|maintenance)"
-
-echo ""
-echo "=== 4.3 Critical infra expected active/idle ==="
-juju status -m openstack rabbitmq-server etcd easyrsa ceph-mon ceph-osd nova-compute 2>/dev/null | grep -E "(active|blocked|error)"
-```
-
-If `vault/0` is not in `blocked: Vault needs to be initialized`, stop. Either Vault hasn't reached config-changed yet (re-run §4 after a few minutes), or it's in a different blocked state that needs investigation.
-
-If `etcd` is NOT active/idle (e.g., still maintenance or blocked), Vault cannot use it as a backend. Investigate easyrsa→etcd cert flow before continuing.
-
----
-
-
-## 5. Discover Vault's address & set environment
-
-Vault has no TLS yet. We connect via HTTP on the unit's port 8200.
-
-```bash
-# Get the unit's IP (NOT the VIP — VIP is hacluster-managed and only active after Vault is up)
-VAULT_UNIT_IP=$(juju show-unit vault/0 -m openstack 2>/dev/null | grep "public-address:" | head -1 | awk '{print $2}')
-
-if [ -z "$VAULT_UNIT_IP" ]; then
-  echo "[FAIL] could not resolve vault/0 public-address. Check 'juju show-unit vault/0'"
-else
-  echo "vault/0 public-address: $VAULT_UNIT_IP"
-fi
-
-# Set VAULT_ADDR for the vault CLI
-export VAULT_ADDR="http://${VAULT_UNIT_IP}:8200"
-echo "VAULT_ADDR=$VAULT_ADDR"
-
-# Confirm reachable
-vault status 2>&1 | head -20
-# Expected output:
-#   Sealed: true
-#   Initialized: false
-#   ... (or similar — exit code 2 is expected when uninitialized)
-```
-
-> **Note on VAULT_ADDR scheme:** HTTP at this stage. After authorize-charm, Vault enables HTTPS using its own internal CA cert. From that point onward, `VAULT_ADDR=https://vault.omega.dc0.vr0.cloud.neumatrix.local:8200` (or `https://10.12.4.236:8200`) is the right address, but `vault` CLI will need the Vault CA root cert via `VAULT_CACERT` or `-tls-skip-verify`. For this document, we only use the HTTP address — once authorize-charm completes, the charm handles all subsequent Vault interactions internally.
-
----
-
-
-## 6. Initialise Vault (one-shot per Vault lifetime)
-
-**WARNING:** the output of this command contains the unseal keys and the root token. If lost, Vault is unrecoverable — there is no "forgot password" path. If exposed, an attacker with the unseal keys can decrypt everything Vault holds.
-
-Capture the output to a file in `$HOME` (filesystem-encrypted assumed; if not, work on a tmpfs):
-
-```bash
-mkdir -p "$HOME/vault-init"
-chmod 700 "$HOME/vault-init"
-
-# Init with 5 key shares, threshold 3
-vault operator init -key-shares=5 -key-threshold=3 \
-  > "$HOME/vault-init/init-output-$(date +%Y%m%d-%H%M%S).txt"
-
-# Permissions: tighten immediately
-chmod 600 "$HOME/vault-init/"*.txt
-
-# Display the output
-INIT_FILE=$(ls -t "$HOME/vault-init/"*.txt | head -1)
-echo "Init output captured to: $INIT_FILE"
-cat "$INIT_FILE"
-```
-
-Expected output format:
-
-```
-Unseal Key 1: <44-char base64>
-Unseal Key 2: <44-char base64>
-Unseal Key 3: <44-char base64>
-Unseal Key 4: <44-char base64>
-Unseal Key 5: <44-char base64>
-
-Initial Root Token: hvs.<long-token>
-
-Vault initialized with 5 key shares and a key threshold of 3. ...
-```
-
-**Immediately:**
-
-1. Copy this output (the entire file) to your secure off-host store (admin workstation encrypted drive, password manager, secrets vault).
-2. Verify you have it stored AND retrievable before proceeding to §7.
-
-The unseal keys are needed every time Vault restarts (including the deploy unit reboot). The root token is needed for `authorize-charm` in §8 and (potentially) for future Vault admin operations.
-
-**Re-running init is destructive.** If something goes wrong here and you decide to wipe Vault, the procedure is: `juju run vault/leader reissue-certificates` (does NOT re-init); or worst case, destroy + redeploy Vault (which discards encrypted state — anything stored in Vault is lost).
-
----
-
-
-## 7. Unseal Vault (3 of 5)
-
-Provide three different unseal keys. Vault decrypts its master key progressively; after the third key, `Sealed: false`.
-
-```bash
-# Extract keys to shell variables (do NOT print them all together)
-INIT_FILE=$(ls -t "$HOME/vault-init/"*.txt | head -1)
-
-# Unseal step 1 — paste Key 1 when prompted (interactive; -prompts safer than passing on CLI)
-vault operator unseal
-
-# Unseal step 2 — paste Key 2
-vault operator unseal
-
-# Unseal step 3 — paste Key 3
-vault operator unseal
-```
-
-After the third unseal, output should show:
-
-```
-Key                    Value
----                    -----
-Seal Type              shamir
-Initialized            true
-Sealed                 false   <-- this is the win condition
-Total Shares           5
-Threshold              3
-Version                1.8.x
-Cluster Name           vault-cluster-XXXX
-...
-```
-
-Verify:
-
-```bash
-vault status
-# Expect: Sealed: false, Initialized: true, HA Enabled: true (since etcd backend)
-```
-
----
-
-
-## 8. Authorize the charm
-
-Vault is now unsealed. The charm needs a token to create its own policies and app roles for managing OpenStack-consumer secrets and certs.
-
-### 8.1 Verify the action signature
-
-The `authorize-charm` action signature has shifted across vault charm revisions. Check first:
-
-```bash
-juju show-action authorize-charm --application vault -m openstack 2>/dev/null || \
-  juju actions vault -m openstack | grep authorize
-```
-
-Look for one of these patterns:
-
-- **Direct-token (older revisions, expected on `1.8/stable`):** parameter is `token=<root-token>`
-- **Juju-secret (newer revisions):** parameter is `token-secret-id=<juju-secret-id>`; the token must be in a Juju secret first
-
-For channel `1.8/stable` (what the bundle pins), the direct-token pattern is expected. If `juju show-action` indicates the secret-based pattern instead, use §8.3.
-
-### 8.2 Direct-token authorize (expected path)
-
-```bash
-# Extract the root token
-INIT_FILE=$(ls -t "$HOME/vault-init/"*.txt | head -1)
-ROOT_TOKEN=$(grep "^Initial Root Token:" "$INIT_FILE" | awk '{print $NF}')
-
-if [ -z "$ROOT_TOKEN" ]; then
-  echo "[FAIL] could not extract root token from $INIT_FILE"
-else
-  echo "Root token captured (length: ${#ROOT_TOKEN})"
-fi
-
-# Run the action
-juju run vault/leader authorize-charm token="$ROOT_TOKEN" -m openstack
-
-# Clear from shell
-unset ROOT_TOKEN
-```
-
-### 8.3 Juju-secret authorize (fallback if §8.1 shows the secret-based signature)
-
-```bash
-INIT_FILE=$(ls -t "$HOME/vault-init/"*.txt | head -1)
-ROOT_TOKEN=$(grep "^Initial Root Token:" "$INIT_FILE" | awk '{print $NF}')
-
-# Create a Juju secret containing the token
-SECRET_ID=$(juju add-secret vault-root-token token="$ROOT_TOKEN" -m openstack | grep -oE "secret:[a-z0-9]+")
-echo "Secret created: $SECRET_ID"
-unset ROOT_TOKEN
-
-# Grant the secret to the vault application
-juju grant-secret "$SECRET_ID" vault -m openstack
-
-# Run the action with the secret-id parameter (parameter name may vary; check §8.1)
-juju run vault/leader authorize-charm token-secret-id="$SECRET_ID" -m openstack
-
-# After authorize completes successfully, the secret can be removed
-juju remove-secret "$SECRET_ID" -m openstack
-```
-
-### 8.4 Verify authorize succeeded
-
-```bash
-echo "=== Vault status after authorize-charm ==="
-juju status vault -m openstack
-# Expect: vault/0 transitions out of 'blocked' to maintenance, then active/idle within 1-2 min
-```
-
-If vault/0 stays blocked after authorize-charm, check the unit log:
-
-```bash
-juju ssh vault/0 -m openstack -- sudo tail -100 /var/log/juju/unit-vault-0.log
-```
-
-Common failures: invalid token format; token already revoked; charm trying to write to a path the token can't access.
-
----
-
-
-## 9. Watch the cert-relation cascade
-
-After Vault is active/idle, the `vault:certificates` relation flows certs to ~20 charms. They progress from `blocked` → `maintenance` (writing certs, restarting services) → `active/idle`.
-
-Expected duration: **15-30 minutes** for the full cascade to settle.
-
-```bash
-juju status --color --watch 30s -m openstack
-```
-
-### 9.1 Expected progression
-
-| Tier | Charms unblocked | Approximate time after authorize-charm |
-|---|---|---|
-| Tier 1 (direct certs) | `mysql-innodb-cluster`, `ovn-central`, `keystone`, `glance`, `neutron-api`, `cinder` | 2-8 min |
-| Tier 2 (waited on Tier 1) | `nova-cloud-controller`, `placement`, `octavia`, `barbican`, `designate`, `magnum`, `openstack-dashboard` | 8-15 min |
-| Tier 3 (subordinates + plugins) | `*-mysql-router`, `*-hacluster`, `neutron-api-plugin-ovn`, `ovn-chassis`, `ovn-chassis-octavia`, `barbican-vault`, `octavia-dashboard`, `magnum-dashboard` | 15-25 min |
-| Tier 4 (downstream) | `glance-simplestreams-sync`, `octavia-diskimage-retrofit`, `designate-bind`, `ceph-radosgw` | 20-30 min |
-
-### 9.2 Final post-Vault end state
-
-When settled, every unit should be `active/idle`. Verify:
-
-```bash
-echo "=== Any unit not in active/idle? ==="
-juju status -m openstack --format=yaml \
-  | python3 -c "
-import yaml, sys
-d = yaml.safe_load(sys.stdin)
-apps = d.get('applications', {})
-issues = []
-
-def check_unit(uname, udata):
-    ws = udata.get('workload-status', {}).get('current', '')
-    js = udata.get('juju-status', {}).get('current', '')
-    msg = udata.get('workload-status', {}).get('message', '')
-    if ws != 'active' or js != 'idle':
-        issues.append(f'{uname}: workload={ws}, juju={js}, msg={msg}')
-
-for app, info in apps.items():
-    units = info.get('units', {}) or {}
-    for uname, udata in units.items():
-        check_unit(uname, udata)
-        # Walk subordinates too (hacluster, mysql-router, etc.)
-        subs = udata.get('subordinates', {}) or {}
-        for sname, sdata in subs.items():
-            check_unit(sname, sdata)
-
-print(f'Non-active/idle units: {len(issues)}')
-for i in issues:
-    print(f'  {i}')
-"
-```
-
-Expected output: `Non-active/idle units: 0`. Anything else needs investigation before the openrc-regeneration step.
-
----
-
-
-## 10. Regenerate admin-openrc
-
-Once Keystone is active/idle and Vault has issued its TLS cert, the new admin-openrc points at the new Caracal cloud.
-
-### 10.1 Pull the admin password from the keystone charm
-
-```bash
-juju run keystone/leader get-admin-password -m openstack
-# Output is YAML — operator extracts the 'admin-password' value manually OR via jq below
-```
-
-For scripted extraction:
-
-```bash
-ADMIN_PASS=$(juju run keystone/leader get-admin-password -m openstack --format json 2>/dev/null \
-  | python3 -c "
-import json, sys
-d = json.load(sys.stdin)
-# Action result format varies; try common shapes
-for k, v in d.items():
-    if isinstance(v, dict):
-        r = v.get('results', {})
-        for key in ('admin-password', 'Stdout', 'password'):
-            if key in r:
-                print(r[key].strip())
-                exit(0)
-print('', end='')
-")
-
-if [ -z "$ADMIN_PASS" ]; then
-  echo "[FAIL] could not extract admin password from action output. Run 'juju run keystone/leader get-admin-password' manually."
-else
-  echo "[OK] admin password captured (length: ${#ADMIN_PASS})"
-fi
-```
-
-### 10.2 Pull the Vault CA root for openrc trust
-
-Keystone's TLS cert is signed by Vault's internal CA. To validate that cert from the openstack CLI, we need the Vault CA root.
-
-```bash
-juju run vault/leader get-root-ca -m openstack > "$HOME/vault-init/vault-ca-root.pem"
-# Strip any YAML wrapping if present (the action returns the cert inline in YAML)
-# Inspect:
-head -5 "$HOME/vault-init/vault-ca-root.pem"
-```
-
-If the output is wrapped (e.g., starts with `Running operation ...` or `unit-vault-0:`), extract just the PEM block. Common pattern:
-
-```bash
-# If the action output wraps the cert, extract just the BEGIN/END CERTIFICATE block
-python3 -c "
-import re
-with open('$HOME/vault-init/vault-ca-root.pem') as f:
-    content = f.read()
-m = re.search(r'-----BEGIN CERTIFICATE-----.*?-----END CERTIFICATE-----', content, re.DOTALL)
-if m:
-    print(m.group(0))
-else:
-    print('NO_CERT_FOUND')
-" > "$HOME/vault-init/vault-ca-root-clean.pem"
-
-mv "$HOME/vault-init/vault-ca-root-clean.pem" "$HOME/vault-init/vault-ca-root.pem"
-openssl x509 -in "$HOME/vault-init/vault-ca-root.pem" -noout -subject -dates
-# Expect: a valid cert with the Vault-charm-generated subject
-```
-
-### 10.3 Write the new admin-openrc
-
-```bash
-# Move any existing admin-openrc out of the way (the prior cycle's pointed at the destroyed cloud)
-if [ -f "$HOME/admin-openrc" ]; then
-  mv "$HOME/admin-openrc" "$HOME/admin-openrc.pre-caracal-$(date +%Y%m%d-%H%M%S)"
-fi
-
-cat > "$HOME/admin-openrc" <<EOF
-# Caracal admin openrc — VR0 DC0 Omega Cloud (v1)
-# Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) UTC
-# Source: v1-do-doc-05-vault-init §10
-export OS_AUTH_URL=https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3
-export OS_USERNAME=admin
-export OS_PASSWORD='$ADMIN_PASS'
-export OS_PROJECT_NAME=admin_domain
-export OS_USER_DOMAIN_NAME=admin_domain
-export OS_PROJECT_DOMAIN_NAME=admin_domain
-export OS_IDENTITY_API_VERSION=3
-export OS_REGION_NAME=RegionOne
-export OS_CACERT=$HOME/vault-init/vault-ca-root.pem
-EOF
-
-chmod 600 "$HOME/admin-openrc"
-unset ADMIN_PASS
-
-echo "Wrote $HOME/admin-openrc — verify by sourcing and running 'openstack token issue'"
-```
-
-> **[unverified, flagging] OS_PROJECT_NAME default:** Charmed-Keystone's admin user lives in `admin_domain`. The default admin project name has varied across charm revisions — common values are `admin_domain` (matching the domain) or `admin`. If the first `openstack token issue` (§10.4) fails with a project-not-found error, try `OS_PROJECT_NAME=admin` instead.
-
-### 10.4 Verify
-
-```bash
-( source "$HOME/admin-openrc"; \
-  echo "Testing auth against $OS_AUTH_URL ..."; \
-  openstack token issue 2>&1 | head -20 )
-```
-
-Expected: a token dump (id, expires, project_id, user_id). If you get `ProjectNotFoundException`, see the flagging note above and try `OS_PROJECT_NAME=admin` in the openrc.
-
-If you get a TLS error (`certificate verify failed`), the OS_CACERT path is wrong or the cert extraction in §10.2 didn't produce a clean cert.
-
----
-
-
-## 11. /etc/hosts sanity (jumphost-side)
-
-The openrc uses FQDN `keystone.omega.dc0.vr0.cloud.neumatrix.local`. That hostname must resolve from the jumphost to the Keystone VIP (`10.12.4.229`) for openrc to work pre-Designate.
-
-```bash
-echo "=== Jumphost /etc/hosts has the API VIPs ==="
-grep -E "10\.12\.4\.(22[4-9]|23[0-6])" /etc/hosts || echo "[WARN] no API VIP hosts found in /etc/hosts"
-```
-
-If absent, add a block:
-
-```bash
-sudo tee -a /etc/hosts > /dev/null <<EOF
-
-# Caracal v1 API VIPs — v1-do-doc-05 §11
-# These are temporary until Designate zones are populated (v1-do-doc-10).
-10.12.4.224 barbican.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.226 cinder.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.227 designate.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.228 glance.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.229 keystone.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.230 magnum.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.231 neutron.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.232 nova.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.233 octavia.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.234 horizon.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.235 placement.omega.dc0.vr0.cloud.neumatrix.local
-10.12.4.236 vault.omega.dc0.vr0.cloud.neumatrix.local
-EOF
-
-# Verify
-grep "omega.dc0.vr0" /etc/hosts | wc -l
-# Expect: 12
-```
-
-This is a bootstrap measure per D-008. Tenant resolution uses Designate (set up in Batch D).
-
----
-
-
-## 12. Functional checkpoint — link to deferred verifications
-
-Now that Octavia is active/idle and the LBaaS Mgmt PKI chain is fully wired, the functional Octavia smoketest documented in `v1-do-doc-02-pki.md` §14 is ready to run. That section requires Glance to have the amphora image AND the LBaaS Mgmt network to be wired.
-
-**Glance amphora image status check:**
-
-```bash
-( source "$HOME/admin-openrc"; \
-  openstack image list --status active | grep -i amphora )
-# Expect: at least one row with name containing 'amphora'. May take 15-30 min after Octavia
-# active/idle for glance-simplestreams-sync + octavia-diskimage-retrofit to populate.
-```
-
-If the amphora image isn't present yet, defer the §14 smoketest until it appears. The pipeline:
-
-- `glance-simplestreams-sync` pulls upstream cloud images into Glance
-- `octavia-diskimage-retrofit` builds the amphora image from one of those base images, tags it as `octavia-amphora`, and pushes to Glance
-
-Both are charms with active relations; they run their pipelines automatically after Keystone is up. Just give them time.
-
-**Octavia smoketest** — execute `v1-do-doc-02-pki.md` §14 once the amphora image is present.
-
----
-
-
-## 13. Acceptance criteria — go/no-go for v1-do-doc-06 (Batch C entry)
-
-Before proceeding to Batch C:
-
-- [ ] §6 Vault init output captured AND verified stored in secure off-host location
-- [ ] §7 Vault unsealed; `vault status` shows `Sealed: false, Initialized: true`
-- [ ] §8 authorize-charm action completed; vault/0 reaches active/idle
-- [ ] §9 All charms `active/idle`; Python check returns "Non-active/idle units: 0"
-- [ ] §10 admin-openrc regenerated; `openstack token issue` succeeds
-- [ ] §11 /etc/hosts has the 12 API VIP entries
-- [ ] (Recommended) §12 amphora image present in Glance, and `v1-do-doc-02-pki.md` §14 smoketest passes
-
-If all checked, proceed to `v1-do-doc-06-magnum-domain.md` (Batch C).
-
----
-
-
-## 14. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Vault topology | num_units=1, hacluster decorative | num_units=3, hacluster active, etcd quorum operative |
-| Unseal procedure | Manual, operator types 3 keys | Auto-unseal via transit engine OR HSM-backed seal |
-| Unseal key storage | Operator-decided off-host | Formal key-escrow procedure |
-| Auto-unseal on reboot | No (host reboot → vault stays sealed → operator must re-unseal) | Yes (transit engine or HSM) |
-| admin password rotation | Manual (juju config keystone admin-password) | Vault-managed rotation |
-| /etc/hosts bootstrap | Manual (this §11) | Bastion-pre-populated; or DNS via local resolver pointed at Designate-on-management |
-| TLS trust distribution | Manual VAULT_CACERT export | Bastion preloaded with Vault root |
-
----
-
-
-## 15. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-27 | Document created. Replaces `runbooks/deprecated/03-vault-init.md` (placeholder). Covers Vault init/unseal/authorize, cert-cascade watch, admin-openrc regeneration, /etc/hosts bootstrap. Flags channel-revision uncertainty on the authorize-charm action signature and the Charmed-Keystone admin project name. | Batch B drafting |
diff --git a/runbooks/v1-do-doc-06-magnum-domain.md b/runbooks/v1-do-doc-06-magnum-domain.md
deleted file mode 100644
index bd28da6..0000000
--- a/runbooks/v1-do-doc-06-magnum-domain.md
+++ /dev/null
@@ -1,182 +0,0 @@
-# v1 Do-Document 06 — Magnum Keystone Domain Setup
-
-**Status:** First execution document of Batch C. Runs after `v1-do-doc-05-vault-init.md` (admin-openrc valid; all charms active/idle including magnum). Runs before `v1-do-doc-07-capi-bootstrap.md` (CAPI workload cluster).
-
-**Replaces:** `runbooks/deprecated/04-magnum-domain.md` (TODO-only placeholder).
-
-**Cross-references:**
-
-- D-007 Layer A (Magnum bundle deploy — done by Batch B) and Layer B (Magnum driver graft — Batch C continuation)
-- D-008 (DNS architecture — the magnum keystone trustee user is in admin_domain, not magnum domain)
-- Charmed Magnum `domain-setup` action documentation
-
----
-
-
-## 1. Purpose & scope
-
-Magnum needs a dedicated Keystone domain (`magnum`) and a service-trust user inside it. Magnum uses this domain to create per-cluster "trust" users at cluster-create time — those trust users are what cluster-internal services use to call back to OpenStack (e.g., OCCM, Cinder CSI). The `magnum` domain is conceptually a sandbox; the trust users have no privileges outside of it.
-
-**What this document does:**
-
-- Runs the Charmed Magnum action `domain-setup` on `magnum/leader`.
-- Verifies the `magnum` domain exists and is enabled.
-- Verifies the `magnum_domain_admin` (or charm-named equivalent) user exists in the magnum domain.
-- Verifies magnum unit is still active/idle after the action.
-
-**What this document does NOT do:**
-
-- Install the Magnum CAPI Helm driver — that's v1-do-doc-08 (Layer B, after CAPI bootstrap).
-- Create the `capi-mgmt` Keystone project / `capo` user / app credential — those live in `admin_domain` and are created in v1-do-doc-07 (CAPI bootstrap consumes them).
-- Create any tenant projects or cluster templates — those are tenant work (Batch D + ongoing).
-
----
-
-
-## 2. Decisions captured
-
-| Decision | Choice | Notes |
-|---|---|---|
-| Trustee domain | `magnum` (separate from `admin_domain`) | Charm default; per-cluster trust users go here |
-| Trustee admin user name | Per charm — typically `magnum_domain_admin` | Verified in §4.3 |
-| Action invocation | `juju run magnum/leader domain-setup --wait=10m` | Synchronous; 10-min timeout sufficient for testcloud |
-| Verification | API-level (`openstack domain show magnum`, `openstack user show ...`) | Direct charm-state inspection optional |
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| `v1-do-doc-05-vault-init.md` ✓ (Vault unsealed, all charms active/idle) | `juju status magnum` shows active/idle |
-| admin-openrc points at Caracal cloud and works | `( source $HOME/admin-openrc; openstack token issue ) | head -3` returns a token |
-| Keystone reachable on its public VIP | Already confirmed by `openstack token issue` |
-
-**Shell context:**
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-cd "$REPO"
-```
-
-**Confirm starting state:**
-
-```bash
-echo "=== Magnum unit status ==="
-juju status magnum -m openstack
-
-echo ""
-echo "=== Domains currently in Keystone (before domain-setup) ==="
-( source "$HOME/admin-openrc"; openstack domain list )
-# Expect: at least 'default' and 'admin_domain'. 'magnum' should NOT appear yet
-# (unless this is a re-run, in which case it's already there — see §6 for re-run posture).
-```
-
----
-
-
-## 4. Run domain-setup action
-
-### 4.1 Invoke
-
-```bash
-juju run magnum/leader domain-setup --wait=10m -m openstack
-```
-
-Expected output: the action returns within 1-3 minutes (well under the 10-min timeout). The result block should show `status: completed` and no error message.
-
-### 4.2 Verify the action succeeded
-
-```bash
-echo "=== Action result inspection ==="
-juju show-action-output --format yaml $(juju list-actions magnum -m openstack 2>/dev/null | tail -5 | awk '{print $1}' | head -1) 2>/dev/null || \
-  echo "(Use 'juju run --wait' synchronous mode — action result was visible in §4.1 output)"
-```
-
-> **Note on Juju 3.x action output:** in Juju 3.x, `juju run` returns the action result synchronously to stdout. You don't generally need a follow-up query unless investigating a failure. If §4.1 output ended in `Running operation ... with 1 task` followed by `done`, the action succeeded.
-
-### 4.3 Verify Keystone state
-
-```bash
-( source "$HOME/admin-openrc"
-
-  echo "=== magnum domain exists and is enabled ==="
-  openstack domain show magnum -f json | python3 -c "
-import json, sys
-d = json.load(sys.stdin)
-print(f'  name:    {d.get(\"name\")}')
-print(f'  enabled: {d.get(\"enabled\")}')
-print(f'  id:      {d.get(\"id\")}')
-print(f'  desc:    {d.get(\"description\")}')
-"
-
-  echo ""
-  echo "=== Users in magnum domain ==="
-  openstack user list --domain magnum
-
-  echo ""
-  echo "=== Projects in magnum domain (likely empty — trust projects are per-cluster) ==="
-  openstack project list --domain magnum
-)
-```
-
-**Expected:**
-
-- `magnum` domain present, `enabled: True`.
-- At least one user in the magnum domain. Charm default name is `magnum_domain_admin` (verify exact name in output).
-- Projects list may be empty at this stage — Magnum creates per-cluster trust projects on demand at cluster-create time.
-
-### 4.4 Verify charm state
-
-```bash
-echo "=== Magnum unit status after domain-setup ==="
-juju status magnum -m openstack
-# Expect: magnum/0 still active/idle. The domain-setup action should not have moved it out of active.
-
-echo ""
-echo "=== Action history for magnum (last 5) ==="
-juju list-actions magnum -m openstack 2>/dev/null | tail -10
-```
-
----
-
-
-## 5. Acceptance criteria — go/no-go for v1-do-doc-07-capi-bootstrap
-
-- [ ] `juju run magnum/leader domain-setup --wait=10m` completed without error
-- [ ] `openstack domain show magnum` returns `enabled: True`
-- [ ] `openstack user list --domain magnum` returns at least one user (the trustee admin)
-- [ ] `juju status magnum` still shows magnum/0 in active/idle
-
-If all checked, proceed to `v1-do-doc-07-capi-bootstrap.md`.
-
----
-
-
-## 6. Re-run posture
-
-The `domain-setup` action is idempotent at the charm level — re-running it does not duplicate the domain or create extra users. The most common reason to re-run is if the charm was reconfigured or upgraded; the action re-ensures the domain state matches what the charm expects.
-
-If §4.3 shows the magnum domain already exists from a prior run, the action will report `completed` with no work performed — that's the expected behavior.
-
----
-
-
-## 7. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Domain name | `magnum` (charm default) | Same |
-| Trustee user creation | Charm action `domain-setup` | Same — but Roosevelt may layer Vault PKI for trustee credentials |
-| Action invocation | Manual `juju run` | Can be wrapped in a deploy script with idempotency check |
-| Verification depth | Domain + user check (this doc §4.3) | Plus Vault audit trail confirming the trustee was created with correct permissions |
-
----
-
-
-## 8. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-27 | Document created. Replaces placeholder runbook 04 (TODO-only). | Batch C drafting |
diff --git a/runbooks/v1-do-doc-07-capi-bootstrap.md b/runbooks/v1-do-doc-07-capi-bootstrap.md
deleted file mode 100644
index 3c3e893..0000000
--- a/runbooks/v1-do-doc-07-capi-bootstrap.md
+++ /dev/null
@@ -1,1085 +0,0 @@
-# v1 Do-Document 07 — CAPI Bootstrap Cluster + Workload Pivot
-
-**Status:** Second execution document of Batch C. Stands up the CAPI bootstrap cluster on `capi-mgmt.maas`, creates the workload cluster on the cloud, pivots cluster state into the workload via `clusterctl move`, and stages the workload kubeconfig for v1-do-doc-08.
-
-**Position in sequence:** Runs after `v1-do-doc-06-magnum-domain.md` (Magnum domain setup). Runs before `v1-do-doc-08-magnum-driver.md` (driver graft consumes the workload kubeconfig produced here).
-
-**Replaces:** `runbooks/04a-capi-bootstrap-cluster.md` — same substantive procedure with fixes applied. The old runbook moves to `runbooks/deprecated/` as part of this batch's commits.
-
-**Fixes applied vs the prior runbook (`runbooks/04a-capi-bootstrap-cluster.md`):**
-
-- `$REPO` corrected from `$HOME/repos/openstack-caracal-ipv4` to `$HOME/openstack-caracal-ipv4`
-- `$VAULT_CA` corrected from `$HOME/vault-pki/root-ca.pem` to `$HOME/vault-init/vault-ca-root.pem` (matches v1-do-doc-05 §10.2 output)
-- `$MAAS_PROFILE` now explicitly set in §3 shell context (prior version referenced it without setting it)
-- §4 adds `KUBERNETES_VERSION` to dynamic pin discovery (was hardcoded `v1.31.4` in §13)
-- §5 MAAS deploy poll's `exit 1` on Failed-deployment converted to non-exiting `[FAIL]` report
-- §11 noble-amd64-missing branch's `exit 1` converted to non-exiting `[FAIL]` report
-- Cross-references updated: "runbook 02" → v1-do-doc-04; "runbook 03" → v1-do-doc-05; "runbook 04" → v1-do-doc-06; "runbook 05" → v1-do-doc-08
-
-**Cross-references:**
-
-- D-017 (CAPI bootstrap cluster lifecycle — full rebuild every cycle)
-- D-007 (Magnum two-layer install — this is Layer B preparation)
-- D-002 (channel matrix — informs Vault CA chain)
-- Workstream 3b decision (2026-05-22): ship Vault CA (no tls-insecure); pivot mandatory
-
----
-
-
-## 1. Purpose & scope
-
-Stand up the CAPI bootstrap cluster on `capi-mgmt.maas` and pivot cluster state into a self-managing workload cluster. Output:
-
-1. **Workload K8s cluster** (`capi-mgmt-cluster`) running in tenant VMs on the cloud, self-managing post-pivot.
-2. **Workload kubeconfig** copied to jumphost at a known path. Consumed by `v1-do-doc-08-magnum-driver.md` for the Magnum CAPI Helm driver graft.
-3. **No remaining state** on the bootstrap k3s VM after pivot. capi-mgmt becomes a disposable jump host.
-
-**D-017 posture:** L3 full teardown and rebuild every deployment cycle. Nothing is preserved across cycles. capi-mgmt is wiped to MAAS Ready on teardown; rebuilt from scratch by this runbook.
-
-**Scope:** v1 testcloud. Roosevelt deltas in §20.
-
-**Out of scope:**
-
-- Magnum-side configuration (v1-do-doc-08)
-- Workload cluster's tenant lifecycle (Magnum's job, not this runbook's)
-- Backup / DR for the workload cluster (Roosevelt concern)
-
----
-
-
-## 2. Decisions captured
-
-Per workstream 3b sign-off (2026-05-22):
-
-| Decision | Choice | Roosevelt parallel |
-|---|---|---|
-| Version pinning | Pin-at-execution with discovery in §4 | Same pattern; pins captured in deploy record |
-| Cloud TLS trust | Ship Vault CA to capi-mgmt + workload nodes (no `tls-insecure`) | Image-baked CA; CK8sConfig redundancy |
-| `clusterctl move` pivot | Mandatory; workload cluster becomes self-managing | Same |
-| K8s flavor | Canonical Kubernetes (CK8s) | Same |
-| OpenStack auth | v3applicationcredential | Same |
-| Pod CIDR | `10.244.0.0/16` | Same (no conflict with cloud `10.12.0.0/16` or tenant pool `10.20.0.0/16`) |
-| Service CIDR | `10.96.0.0/12` | Same |
-| Workload cluster name | `capi-mgmt-cluster` | Same |
-| Workload node SSH user | `ubuntu` (MAAS/cloud-init convention) | Same |
-
-**Naming convention:**
-
-- Keystone project for CAPI: `capi-mgmt` (in `admin_domain`)
-- Keystone user for CAPI: `capo` (CAPO operator)
-- App credential: `capo-app-cred`
-- Workload image (Glance): `noble-amd64` (do NOT duplicate as `ubuntu-24.04-capi` — Bobcat lesson)
-- Workload flavor: `capi-mgmt-node` (4 vCPU / 4 GiB / 30 GB) — control plane node sizing
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| Cloud deployed; all charms `active/idle` per D-011 | `juju status --color | grep -v "active.*idle"` returns only the header |
-| Vault initialized + unsealed (v1-do-doc-05) | `juju ssh vault/leader -- sudo vault status` shows `Sealed=false` |
-| Vault root CA available on jumphost | `test -f $HOME/vault-init/vault-ca-root.pem && openssl x509 -in $HOME/vault-init/vault-ca-root.pem -noout -subject` |
-| Keystone reachable via FQDN | `curl -sf --cacert $HOME/vault-init/vault-ca-root.pem https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3 | jq .version.id` returns `"v3.14"` or current |
-| Magnum domain set up (v1-do-doc-06) | `( source $HOME/admin-openrc; openstack domain show magnum -f value -c enabled )` returns `True` |
-| capi-mgmt VM exists in MAAS as Ready | `maas $MAAS_PROFILE machines read | jq '.[] | select(.hostname=="capi-mgmt") | .status_name'` returns `"Ready"` |
-| Admin openrc available | `test -f $HOME/admin-openrc && ( source $HOME/admin-openrc && openstack token issue | head -3 )` |
-| Workspace path under $HOME (snap confinement) | `WORK=$HOME/capi-bootstrap; mkdir -p "$WORK"; cd "$WORK"; pwd` shows under home |
-
-**Set shell context for the runbook:**
-
-```bash
-export REPO="$HOME/openstack-caracal-ipv4"
-export WORK="$HOME/capi-bootstrap"
-export VAULT_CA="$HOME/vault-init/vault-ca-root.pem"
-export CAPI_MGMT_METAL_IP=10.12.8.21
-export CAPI_MGMT_PROVIDER_IP=10.12.4.21
-export CLUSTER_NAME=capi-mgmt-cluster
-export MAAS_PROFILE=$(maas list 2>/dev/null | awk 'NR==1 {print $1}')
-
-mkdir -p "$WORK"
-cd "$WORK"
-
-# Sanity-check setup
-echo "REPO=$REPO"
-echo "WORK=$WORK"
-echo "VAULT_CA=$VAULT_CA"
-echo "MAAS_PROFILE=$MAAS_PROFILE"
-test -f "$VAULT_CA" && echo "[OK] Vault CA present" || echo "[FAIL] Vault CA missing"
-test -n "$MAAS_PROFILE" && echo "[OK] MAAS_PROFILE set" || echo "[FAIL] MAAS_PROFILE empty — run 'maas login' first"
-```
-
----
-
-
-## 4. Version discovery (set pins)
-
-Pin-at-execution with discovery procedure documented inline so each rebuild's pins are reproducible AND traceable.
-
-**GitHub API: authenticated vs unauthenticated.** Unauth has 60 req/hr; authenticated has 5000. For multiple rebuilds in a day, set a token:
-
-```bash
-# Optional but recommended — avoids rate-limit headaches during rebuild
-export GITHUB_TOKEN=<your-PAT-with-public_repo-read>
-# Or skip if you can tolerate ~10 API calls slowly
-```
-
-**Discover current stable releases:**
-
-```bash
-cd "$WORK"
-
-# Helper: fetch latest stable release tag from a GitHub repo
-gh_latest() {
-  local repo=$1
-  local auth=""
-  [ -n "$GITHUB_TOKEN" ] && auth="-H Authorization: Bearer $GITHUB_TOKEN"
-  curl -sfL $auth "https://api.github.com/repos/$repo/releases/latest" \
-    | jq -r '.tag_name'
-}
-
-# Pin captures (one file per pin)
-mkdir -p pins
-gh_latest "kubernetes-sigs/cluster-api"                | tee pins/CAPI_VERSION
-gh_latest "kubernetes-sigs/cluster-api-provider-openstack" | tee pins/CAPO_VERSION
-gh_latest "canonical/cluster-api-k8s"                  | tee pins/CK8S_VERSION
-gh_latest "cert-manager/cert-manager"                  | tee pins/CERT_MANAGER_VERSION
-gh_latest "k-orc/openstack-resource-controller"        | tee pins/ORC_VERSION
-gh_latest "k3s-io/k3s"                                 | tee pins/K3S_VERSION
-gh_latest "helm/helm"                                  | tee pins/HELM_VERSION
-
-# Load into shell
-export CAPI_VERSION=$(cat pins/CAPI_VERSION)
-export CAPO_VERSION=$(cat pins/CAPO_VERSION)
-export CK8S_VERSION=$(cat pins/CK8S_VERSION)
-export CERT_MANAGER_VERSION=$(cat pins/CERT_MANAGER_VERSION)
-export ORC_VERSION=$(cat pins/ORC_VERSION)
-export K3S_VERSION=$(cat pins/K3S_VERSION)
-export HELM_VERSION=$(cat pins/HELM_VERSION)
-```
-
-**Discover Kubernetes version supported by the pinned CK8s release:**
-
-The CK8s release publishes a `metadata.yaml` alongside its components that names the Kubernetes versions it supports. Discover the latest supported patch:
-
-```bash
-gh_supported_k8s() {
-  local ck8s_ver=$1
-  # CK8s release metadata.yaml is typically published as a release asset
-  curl -sfL "https://github.com/canonical/cluster-api-k8s/releases/download/${ck8s_ver}/metadata.yaml" 2>/dev/null \
-    | grep -oE "v1\.[0-9]+\.[0-9]+" | sort -uV | tail -1
-}
-
-KUBERNETES_VERSION=$(gh_supported_k8s "$CK8S_VERSION")
-
-if [ -z "$KUBERNETES_VERSION" ]; then
-  echo "[WARN] could not auto-discover k8s version for CK8s $CK8S_VERSION via metadata.yaml"
-  echo "        Consult release notes at: https://github.com/canonical/cluster-api-k8s/releases/tag/$CK8S_VERSION"
-  echo "        Then set manually: export KUBERNETES_VERSION=v1.X.Y"
-  echo "        (Re-run rest of §4 after setting.)"
-else
-  echo "[OK] Discovered KUBERNETES_VERSION=$KUBERNETES_VERSION for CK8s=$CK8S_VERSION"
-  echo "$KUBERNETES_VERSION" > pins/KUBERNETES_VERSION
-  export KUBERNETES_VERSION
-fi
-
-# Display for the deploy log
-echo ""
-echo "=== Pinned versions ==="
-for f in pins/*_VERSION; do
-  printf "%-30s %s\n" "$(basename "$f")" "$(cat "$f")"
-done
-```
-
-**Sanity check:** all values should look like `v1.X.Y` or `v0.X.Y`. If any returned `null` or empty, the GitHub API call failed — most likely rate-limited. Wait an hour or set `$GITHUB_TOKEN` and retry.
-
-**Capture pins to deploy record:**
-
-```bash
-DEPLOY_RECORD=$HOME/deploy-records/$(date +%Y%m%d-%H%M%S)/capi-pins
-mkdir -p "$DEPLOY_RECORD"
-cp pins/*_VERSION "$DEPLOY_RECORD/"
-ls -la "$DEPLOY_RECORD/"
-```
-
----
-
-
-## 5. MAAS-deploy capi-mgmt
-
-Prerequisite: capi-mgmt MAAS machine is in `Ready` state (see §3). Network config in MAAS:
-
-- **eth0** on metal fabric, DHCP → `10.12.8.21` (MAAS-pinned static lease)
-- **eth1** on provider fabric, static → `10.12.4.21`
-
-Deploy Ubuntu 24.04 (Noble):
-
-```bash
-# Get the capi-mgmt system_id from MAAS
-CAPI_MGMT_SYSTEM_ID=$(maas $MAAS_PROFILE machines read \
-  | jq -r '.[] | select(.hostname=="capi-mgmt") | .system_id')
-echo "capi-mgmt system_id: $CAPI_MGMT_SYSTEM_ID"
-
-# Deploy
-maas $MAAS_PROFILE machine deploy "$CAPI_MGMT_SYSTEM_ID" \
-  distro_series=noble \
-  hwe_kernel=ga-24.04
-```
-
-**Poll for `Deployed`:**
-
-```bash
-DEPLOY_OK=1
-for i in $(seq 1 60); do
-  STATUS=$(maas $MAAS_PROFILE machine read "$CAPI_MGMT_SYSTEM_ID" | jq -r '.status_name')
-  echo "$(date -Is) capi-mgmt status: $STATUS"
-  if [ "$STATUS" = "Deployed" ]; then
-    echo "[OK] capi-mgmt Deployed"
-    DEPLOY_OK=0
-    break
-  fi
-  if [ "$STATUS" = "Failed deployment" ]; then
-    echo "[FAIL] capi-mgmt deployment failed — STOP here, investigate via MAAS UI before continuing"
-    DEPLOY_OK=2
-    break
-  fi
-  sleep 30
-done
-
-if [ "$DEPLOY_OK" -ne 0 ]; then
-  echo "[FAIL] poll exited without a clean Deployed state. STATUS=$STATUS. Stop and investigate."
-fi
-```
-
-Typical deploy time: 5-8 minutes on this hardware.
-
-**SSH reachability:**
-
-```bash
-# MAAS .maas zone may not resolve from jumphost — use IP directly per handoff lessons
-ssh -o StrictHostKeyChecking=accept-new ubuntu@$CAPI_MGMT_METAL_IP -- hostname
-# Expect: capi-mgmt
-```
-
-> **Gotcha:** MAAS-deployed Ubuntu uses the `ubuntu` user, not `jessea123`. See handoff "recurring technical pitfalls."
-
----
-
-
-## 6. SSH bootstrap + Vault CA install
-
-On the jumphost, prepare a transport bundle of essentials:
-
-```bash
-mkdir -p "$WORK/bootstrap-bundle"
-cp "$VAULT_CA" "$WORK/bootstrap-bundle/vault-ca.crt"
-chmod 644 "$WORK/bootstrap-bundle/vault-ca.crt"
-
-# Bundle pin files so capi-mgmt can read versions
-cp -r "$WORK/pins" "$WORK/bootstrap-bundle/"
-```
-
-SCP and install Vault CA on capi-mgmt:
-
-```bash
-scp -r "$WORK/bootstrap-bundle" ubuntu@$CAPI_MGMT_METAL_IP:/home/ubuntu/
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP <<'EOF'
-set -euo pipefail
-
-# Install Vault CA as a system-trusted root
-sudo cp /home/ubuntu/bootstrap-bundle/vault-ca.crt /usr/local/share/ca-certificates/
-sudo update-ca-certificates 2>&1 | tail -3
-
-# Verify
-openssl s_client -connect keystone.omega.dc0.vr0.cloud.neumatrix.local:5000 \
-  -CApath /etc/ssl/certs -verify_return_error </dev/null 2>&1 \
-  | grep -E "(Verify return code|subject=)" || \
-  { echo "TLS chain verify failed against Keystone — investigate before proceeding"; exit 1; }
-
-# Update apt + base utilities
-sudo apt-get update -qq
-sudo apt-get install -y -qq jq curl yq
-
-# Confirm
-which jq curl yq
-EOF
-```
-
-**Expected:**
-
-- `update-ca-certificates` reports "1 added"
-- `openssl s_client` shows `Verify return code: 0 (ok)` and a Keystone cert whose chain terminates at the Vault CA
-
-> **Why this matters:** Bobcat used `tls-insecure=true` in cloud.conf which skipped this entire trust path. Our workstream 3b decision (ship Vault CA) means OCCM and CAPO will validate certs against this trust store. If TLS verify fails here, OCCM will crashloop later.
-
-> **`exit 1` inside ssh heredoc:** the heredoc body runs on the remote host inside its own bash session. `exit 1` there exits the REMOTE session, propagating a non-zero exit back to the local ssh — it does NOT kill the operator's shell.
-
----
-
-
-## 7. k3s install
-
-On capi-mgmt:
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "K3S_VERSION=$K3S_VERSION CAPI_MGMT_METAL_IP=$CAPI_MGMT_METAL_IP bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# Install k3s with explicit bind/advertise/SAN flags
-curl -sfL https://get.k3s.io | \
-  INSTALL_K3S_VERSION="$K3S_VERSION" \
-  sh -s - server \
-    --bind-address="$CAPI_MGMT_METAL_IP" \
-    --advertise-address="$CAPI_MGMT_METAL_IP" \
-    --node-ip="$CAPI_MGMT_METAL_IP" \
-    --tls-san="$CAPI_MGMT_METAL_IP" \
-    --tls-san=capi-mgmt.maas \
-    --write-kubeconfig-mode=0644 \
-    --disable=traefik
-
-# Wait for k3s API to respond
-for i in $(seq 1 30); do
-  if sudo kubectl get nodes 2>/dev/null | grep -q "Ready"; then
-    echo "k3s ready"; break
-  fi
-  echo "Waiting for k3s API... ($i/30)"
-  sleep 5
-done
-
-sudo kubectl get nodes
-sudo kubectl get pods -A
-REMOTE_EOF
-```
-
-> **Gotcha:** `--bind-address=$IP` makes k3s listen ONLY on that IP — not also on 127.0.0.1. The default kubeconfig at `/etc/rancher/k3s/k3s.yaml` has `server: https://127.0.0.1:6443` and will NOT work as-is. Sed-rewrite below.
-
----
-
-
-## 8. Kubeconfig server-URL rewrite
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "CAPI_MGMT_METAL_IP=$CAPI_MGMT_METAL_IP bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# Copy k3s kubeconfig to ubuntu user; rewrite server URL
-mkdir -p /home/ubuntu/.kube
-sudo cp /etc/rancher/k3s/k3s.yaml /home/ubuntu/.kube/config
-sudo chown ubuntu:ubuntu /home/ubuntu/.kube/config
-chmod 600 /home/ubuntu/.kube/config
-
-# Rewrite 127.0.0.1 → metal IP
-sed -i "s|server: https://127.0.0.1:6443|server: https://$CAPI_MGMT_METAL_IP:6443|" \
-  /home/ubuntu/.kube/config
-
-# Verify rewrite
-grep "server:" /home/ubuntu/.kube/config
-# Expect: server: https://10.12.8.21:6443
-
-# Confirm kubectl works as ubuntu user (no sudo)
-kubectl get nodes
-REMOTE_EOF
-```
-
----
-
-
-## 9. helm + clusterctl install
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "HELM_VERSION=$HELM_VERSION CAPI_VERSION=$CAPI_VERSION bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# helm install (get-helm-3 fetches the version we specify)
-cd /tmp
-curl -sfL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \
-  | DESIRED_VERSION="$HELM_VERSION" bash
-helm version --short
-
-# clusterctl install
-CLUSTERCTL_URL="https://github.com/kubernetes-sigs/cluster-api/releases/download/${CAPI_VERSION}/clusterctl-linux-amd64"
-sudo curl -sfL "$CLUSTERCTL_URL" -o /usr/local/bin/clusterctl
-sudo chmod +x /usr/local/bin/clusterctl
-clusterctl version
-REMOTE_EOF
-```
-
----
-
-
-## 10. clusterctl init (CAPI controllers + cert-manager + ORC + CAPO + CK8s)
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP "CK8S_VERSION=$CK8S_VERSION CERT_MANAGER_VERSION=$CERT_MANAGER_VERSION ORC_VERSION=$ORC_VERSION CAPO_VERSION=$CAPO_VERSION CAPI_VERSION=$CAPI_VERSION bash -s" <<'REMOTE_EOF'
-set -euo pipefail
-
-# Configure clusterctl with provider URLs
-mkdir -p ~/.cluster-api
-cat > ~/.cluster-api/clusterctl.yaml <<EOF
-providers:
-  - name: "canonical-kubernetes"
-    url: "https://github.com/canonical/cluster-api-k8s/releases/${CK8S_VERSION}/bootstrap-components.yaml"
-    type: "BootstrapProvider"
-  - name: "canonical-kubernetes"
-    url: "https://github.com/canonical/cluster-api-k8s/releases/${CK8S_VERSION}/control-plane-components.yaml"
-    type: "ControlPlaneProvider"
-EOF
-
-# Initialize CAPI with explicit versions
-clusterctl init \
-  --core "cluster-api:${CAPI_VERSION}" \
-  --infrastructure "openstack:${CAPO_VERSION}" \
-  --bootstrap "canonical-kubernetes:${CK8S_VERSION}" \
-  --control-plane "canonical-kubernetes:${CK8S_VERSION}" \
-  --cert-manager-version "${CERT_MANAGER_VERSION}"
-
-# Wait for controllers to be Ready
-kubectl wait --for=condition=Available --timeout=5m deployment --all -n capi-system
-kubectl wait --for=condition=Available --timeout=5m deployment --all -n capi-kubeadm-bootstrap-system 2>/dev/null || true
-kubectl wait --for=condition=Available --timeout=5m deployment --all -n capo-system
-kubectl wait --for=condition=Available --timeout=5m deployment --all -n cert-manager
-
-# Install ORC
-kubectl apply -f "https://github.com/k-orc/openstack-resource-controller/releases/${ORC_VERSION}/orc.yaml"
-kubectl wait --for=condition=Available --timeout=5m deployment --all -n orc-system
-
-# Confirm all controllers
-kubectl get pods -A | grep -v "Running\|Completed" | grep -v NAME
-# Expected: empty output (all pods Running or no abnormal state)
-REMOTE_EOF
-```
-
-> **Gotcha:** the actual namespace names (`capi-system`, `capo-system`, etc.) are conventions. If a controller fails to land in the expected namespace, `kubectl get deployment -A` lists all deployments — diagnose from there.
-
----
-
-
-## 11. Cloud-side prep (Keystone, Nova, Glance)
-
-Back on the jumphost:
-
-```bash
-source $HOME/admin-openrc
-
-# Inventory existing resources FIRST (Bobcat lesson: don't create duplicates)
-echo "=== Existing images ==="
-openstack image list -c ID -c Name -f json | jq -r '.[] | "\(.Name)\t\(.ID)"'
-echo ""
-echo "=== Existing flavors ==="
-openstack flavor list -c Name -c ID -c RAM -c VCPUs -c Disk -f json \
-  | jq -r '.[] | "\(.Name)\tRAM=\(.RAM)\tCPU=\(.VCPUs)\tDisk=\(.Disk)\tID=\(.ID)"'
-echo ""
-echo "=== Existing keypairs ==="
-openstack keypair list
-echo ""
-echo "=== Existing projects in admin_domain ==="
-openstack project list --domain admin_domain
-```
-
-**Create / verify resources:**
-
-```bash
-# Keystone project + user
-openstack project show capi-mgmt --domain admin_domain 2>/dev/null \
-  || openstack project create capi-mgmt --domain admin_domain --description "CAPI management plane"
-
-openstack user show capo --domain admin_domain 2>/dev/null \
-  || openstack user create capo --domain admin_domain --password-prompt --description "CAPO operator"
-
-# Role assignments (CAPO needs member + load-balancer_member at minimum;
-# admin works for testcloud — Roosevelt should use least-privilege)
-openstack role add --user capo --user-domain admin_domain \
-  --project capi-mgmt --project-domain admin_domain \
-  member
-
-openstack role add --user capo --user-domain admin_domain \
-  --project capi-mgmt --project-domain admin_domain \
-  load-balancer_member 2>/dev/null || \
-  echo "(load-balancer_member role may not exist if Octavia not deployed yet)"
-
-# Application credential — captured to file under $HOME (snap confinement)
-APP_CRED_FILE=$WORK/capo-app-cred.json
-openstack --os-username capo --os-user-domain-name admin_domain \
-          --os-project-name capi-mgmt --os-project-domain-name admin_domain \
-  application credential create capo-app-cred \
-  --description "CAPO operator app credential" \
-  -f json > "$APP_CRED_FILE"
-chmod 600 "$APP_CRED_FILE"
-
-# Extract credential ID + secret
-export APP_CRED_ID=$(jq -r '.id' "$APP_CRED_FILE")
-export APP_CRED_SECRET=$(jq -r '.secret' "$APP_CRED_FILE")
-echo "App cred ID: $APP_CRED_ID"
-```
-
-**Nova keypair (workload node SSH key):**
-
-```bash
-# Generate fresh keypair locally (do NOT reuse jumphost personal key)
-ssh-keygen -t ed25519 -N '' -f "$WORK/capi-workload-key" \
-  -C "capi-workload-$(date +%Y%m%d)"
-chmod 600 "$WORK/capi-workload-key"
-
-# Upload public key to Keystone as a Nova keypair
-openstack keypair create --public-key "$WORK/capi-workload-key.pub" capi-workload-key
-openstack keypair show capi-workload-key
-```
-
-**Workload image:**
-
-```bash
-# Inventory check — use noble-amd64 if it exists (Bobcat lesson: do NOT create ubuntu-24.04-capi as a dup)
-NOBLE_IMAGE_ID=$(openstack image show noble-amd64 -c id -f value 2>/dev/null || echo "")
-
-if [ -z "$NOBLE_IMAGE_ID" ]; then
-  echo "[FAIL] noble-amd64 image not found in Glance. Upload required before proceeding:"
-  echo ""
-  echo "  wget https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img -O $WORK/noble-server-cloudimg-amd64.img"
-  echo "  openstack image create --disk-format qcow2 --container-format bare \\"
-  echo "    --public --file $WORK/noble-server-cloudimg-amd64.img noble-amd64"
-  echo ""
-  echo "  Then re-run this section."
-else
-  echo "[OK] Using image: noble-amd64 ($NOBLE_IMAGE_ID)"
-  export WORKLOAD_IMAGE_ID=$NOBLE_IMAGE_ID
-fi
-```
-
-If the image was missing, the rest of §11 cannot complete. Stop here, upload the image, and rerun §11 from the top.
-
-**Workload flavor:**
-
-```bash
-openstack flavor show capi-mgmt-node 2>/dev/null \
-  || openstack flavor create capi-mgmt-node \
-       --vcpus 4 --ram 4096 --disk 30 \
-       --description "CAPI workload node (control plane sizing)"
-
-export WORKLOAD_FLAVOR=capi-mgmt-node
-```
-
----
-
-
-## 12. clouds.yaml + cloud.conf composition (with Vault CA, no tls-insecure)
-
-The workload cluster's OCCM (OpenStack Cloud Controller Manager) and CAPO both need to call OpenStack APIs. Two files:
-
-- `clouds.yaml` — CAPO's view of how to reach OpenStack (used at cluster creation time on capi-mgmt)
-- `cloud.conf` — OCCM's view, injected into the workload cluster's k8s Secret (used continuously by OCCM running in the workload cluster)
-
-**Compose clouds.yaml:**
-
-```bash
-cat > "$WORK/clouds.yaml" <<EOF
-clouds:
-  capi-mgmt:
-    region_name: RegionOne
-    interface: public
-    identity_api_version: 3
-    auth_type: v3applicationcredential
-    auth:
-      auth_url: https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3
-      application_credential_id: $APP_CRED_ID
-      application_credential_secret: $APP_CRED_SECRET
-    cacert: /usr/local/share/ca-certificates/vault-ca.crt
-    verify: true
-EOF
-chmod 600 "$WORK/clouds.yaml"
-
-# base64-encode for cluster template embedding (no newline wrapping)
-base64 -w0 "$WORK/clouds.yaml" > "$WORK/clouds.yaml.b64"
-```
-
-**Compose cloud.conf** (INI format, NOT YAML):
-
-```bash
-cat > "$WORK/cloud.conf" <<EOF
-[Global]
-auth-url=https://keystone.omega.dc0.vr0.cloud.neumatrix.local:5000/v3
-application-credential-id=$APP_CRED_ID
-application-credential-secret=$APP_CRED_SECRET
-region=RegionOne
-domain-name=admin_domain
-ca-file=/usr/local/share/ca-certificates/vault-ca.crt
-
-[LoadBalancer]
-use-octavia=true
-EOF
-chmod 600 "$WORK/cloud.conf"
-
-base64 -w0 "$WORK/cloud.conf" > "$WORK/cloud.conf.b64"
-```
-
-> **Critical delta from Bobcat:** the `ca-file` line replaces `tls-insecure=true`. The path `/usr/local/share/ca-certificates/vault-ca.crt` exists on capi-mgmt (from §6) AND will be injected into workload nodes via CK8sConfig in §13.
-
-**base64-encode Vault CA for CK8sConfig injection:**
-
-```bash
-base64 -w0 "$VAULT_CA" > "$WORK/vault-ca.crt.b64"
-wc -c "$WORK/vault-ca.crt.b64"
-```
-
----
-
-
-## 13. Cluster template rendering (with Vault CA injection)
-
-The cluster template defines: Cluster, OpenStackCluster, CK8sControlPlane, CK8sConfigTemplate (control plane + workers), MachineDeployment, Secrets for clouds.yaml and cloud.conf.
-
-Variables (18 total):
-
-```bash
-export CLUSTER_NAME=capi-mgmt-cluster
-export CLUSTER_NAMESPACE=default
-# KUBERNETES_VERSION was discovered in §4; verify it's set
-test -n "$KUBERNETES_VERSION" || { echo "[FAIL] KUBERNETES_VERSION not set; rerun §4 discovery"; }
-echo "Using KUBERNETES_VERSION=$KUBERNETES_VERSION"
-export CONTROL_PLANE_MACHINE_COUNT=1           # 3 for HA on Roosevelt
-export WORKER_MACHINE_COUNT=2                  # 3 on Roosevelt
-export OPENSTACK_DNS_NAMESERVERS=1.1.1.1,1.0.0.1   # public DNS, per D-019 (Designate deferred to v2)
-export OPENSTACK_FAILURE_DOMAIN=nova
-export OPENSTACK_EXTERNAL_NETWORK_ID=$(openstack network show ext_net -c id -f value)
-export OPENSTACK_IMAGE_NAME=noble-amd64
-export OPENSTACK_FLAVOR=capi-mgmt-node
-export OPENSTACK_SSH_KEY_NAME=capi-workload-key
-export POD_CIDR=10.244.0.0/16
-export SERVICE_CIDR=10.96.0.0/12
-export CLOUDS_YAML_B64=$(cat "$WORK/clouds.yaml.b64")
-export CLOUD_CONF_B64=$(cat "$WORK/cloud.conf.b64")
-export VAULT_CA_B64=$(cat "$WORK/vault-ca.crt.b64")
-export CLUSTER_DOMAIN=cluster.local
-export OPENSTACK_CLOUD=capi-mgmt
-
-# Sanity print
-env | grep -E "^(CLUSTER|KUBERNETES|CONTROL_PLANE|WORKER|OPENSTACK|POD|SERVICE|VAULT|CLOUD)" \
-  | grep -v "B64\|SECRET\|PASS" | sort
-```
-
-**Render the cluster template:**
-
-```bash
-cat > "$WORK/cluster-template.yaml" <<'TEMPLATE_EOF'
-apiVersion: v1
-kind: Secret
-metadata:
-  name: ${CLUSTER_NAME}-cloud-config
-  namespace: ${CLUSTER_NAMESPACE}
-type: Opaque
-data:
-  clouds.yaml: ${CLOUDS_YAML_B64}
-  cloud.conf: ${CLOUD_CONF_B64}
-  cacert: ${VAULT_CA_B64}
----
-apiVersion: cluster.x-k8s.io/v1beta1
-kind: Cluster
-metadata:
-  name: ${CLUSTER_NAME}
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  clusterNetwork:
-    pods:
-      cidrBlocks:
-        - ${POD_CIDR}
-    services:
-      cidrBlocks:
-        - ${SERVICE_CIDR}
-    serviceDomain: ${CLUSTER_DOMAIN}
-  infrastructureRef:
-    apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-    kind: OpenStackCluster
-    name: ${CLUSTER_NAME}
-  controlPlaneRef:
-    apiVersion: controlplane.cluster.x-k8s.io/v1beta2
-    kind: CK8sControlPlane
-    name: ${CLUSTER_NAME}-control-plane
----
-apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-kind: OpenStackCluster
-metadata:
-  name: ${CLUSTER_NAME}
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  identityRef:
-    name: ${CLUSTER_NAME}-cloud-config
-    cloudName: ${OPENSTACK_CLOUD}
-  externalNetwork:
-    id: ${OPENSTACK_EXTERNAL_NETWORK_ID}
-  managedSecurityGroups:
-    allowAllInClusterTraffic: true
-  apiServerLoadBalancer:
-    enabled: true
----
-apiVersion: controlplane.cluster.x-k8s.io/v1beta2
-kind: CK8sControlPlane
-metadata:
-  name: ${CLUSTER_NAME}-control-plane
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  replicas: ${CONTROL_PLANE_MACHINE_COUNT}
-  version: ${KUBERNETES_VERSION}
-  machineTemplate:
-    infrastructureTemplate:
-      apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-      kind: OpenStackMachineTemplate
-      name: ${CLUSTER_NAME}-control-plane
-  spec:
-    files:
-      - path: /usr/local/share/ca-certificates/vault-ca.crt
-        owner: root:root
-        permissions: "0644"
-        contentFrom:
-          secret:
-            name: ${CLUSTER_NAME}-cloud-config
-            key: cacert
-    preRunCommands:
-      - update-ca-certificates
-    extraKubeAPIServerArgs:
-      "--cloud-provider": external
----
-apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-kind: OpenStackMachineTemplate
-metadata:
-  name: ${CLUSTER_NAME}-control-plane
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  template:
-    spec:
-      flavor: ${OPENSTACK_FLAVOR}
-      image:
-        filter:
-          name: ${OPENSTACK_IMAGE_NAME}
-      sshKeyName: ${OPENSTACK_SSH_KEY_NAME}
-      identityRef:
-        name: ${CLUSTER_NAME}-cloud-config
-        cloudName: ${OPENSTACK_CLOUD}
----
-apiVersion: cluster.x-k8s.io/v1beta1
-kind: MachineDeployment
-metadata:
-  name: ${CLUSTER_NAME}-md-0
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  clusterName: ${CLUSTER_NAME}
-  replicas: ${WORKER_MACHINE_COUNT}
-  selector:
-    matchLabels: {}
-  template:
-    spec:
-      clusterName: ${CLUSTER_NAME}
-      version: ${KUBERNETES_VERSION}
-      bootstrap:
-        configRef:
-          apiVersion: bootstrap.cluster.x-k8s.io/v1beta2
-          kind: CK8sConfigTemplate
-          name: ${CLUSTER_NAME}-md-0
-      infrastructureRef:
-        apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-        kind: OpenStackMachineTemplate
-        name: ${CLUSTER_NAME}-md-0
----
-apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
-kind: OpenStackMachineTemplate
-metadata:
-  name: ${CLUSTER_NAME}-md-0
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  template:
-    spec:
-      flavor: ${OPENSTACK_FLAVOR}
-      image:
-        filter:
-          name: ${OPENSTACK_IMAGE_NAME}
-      sshKeyName: ${OPENSTACK_SSH_KEY_NAME}
-      identityRef:
-        name: ${CLUSTER_NAME}-cloud-config
-        cloudName: ${OPENSTACK_CLOUD}
----
-apiVersion: bootstrap.cluster.x-k8s.io/v1beta2
-kind: CK8sConfigTemplate
-metadata:
-  name: ${CLUSTER_NAME}-md-0
-  namespace: ${CLUSTER_NAMESPACE}
-spec:
-  template:
-    spec:
-      files:
-        - path: /usr/local/share/ca-certificates/vault-ca.crt
-          owner: root:root
-          permissions: "0644"
-          contentFrom:
-            secret:
-              name: ${CLUSTER_NAME}-cloud-config
-              key: cacert
-      preRunCommands:
-        - update-ca-certificates
-TEMPLATE_EOF
-
-# envsubst to render
-envsubst < "$WORK/cluster-template.yaml" > "$WORK/cluster-rendered.yaml"
-
-# Validate as YAML
-python3 -c "import yaml; list(yaml.safe_load_all(open('$WORK/cluster-rendered.yaml'))); print('YAML OK')"
-
-# Quick visual check — no leftover ${...} markers
-grep -n '\${' "$WORK/cluster-rendered.yaml" || echo "No unsubstituted variables — good"
-```
-
-> **CK8sConfig field name caveat:** the exact field names (`files`, `preRunCommands`) and their `contentFrom.secret` schema are CK8s-version-dependent. If `clusterctl init` failed earlier with schema warnings, consult the CK8s release notes for the pinned `$CK8S_VERSION`.
-
----
-
-
-## 14. Apply + poll-to-Ready
-
-Transfer rendered template to capi-mgmt and apply:
-
-```bash
-scp "$WORK/cluster-rendered.yaml" ubuntu@$CAPI_MGMT_METAL_IP:/home/ubuntu/cluster.yaml
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP <<'EOF'
-set -euo pipefail
-kubectl apply -f /home/ubuntu/cluster.yaml
-echo "Applied. Waiting for cluster Available status (15-min timeout)..."
-
-for i in $(seq 1 90); do
-  STATUS=$(kubectl get cluster capi-mgmt-cluster -o json 2>/dev/null \
-    | jq -r '.status.phase // "Unknown"')
-  READY=$(kubectl get cluster capi-mgmt-cluster -o json 2>/dev/null \
-    | jq -r '.status.conditions[]? | select(.type=="Ready") | .status' \
-    | head -1)
-  echo "$(date -Is) phase=$STATUS ready=$READY"
-  [ "$READY" = "True" ] && { echo "Cluster Ready"; break; }
-  sleep 10
-done
-
-kubectl get cluster,machines,kubeadmcontrolplane,machinedeployment -A
-EOF
-```
-
-**If the poll times out before Ready,** typical diagnosis:
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl describe cluster capi-mgmt-cluster
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get machines -A
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl logs -n capo-system deployment/capo-controller-manager --tail=100
-```
-
-Common causes:
-
-- OpenStack API unreachable from capi-mgmt → check Vault CA install on capi-mgmt (§6)
-- Image / flavor / network ID wrong in cluster template → re-check §11 variables
-- Security group rules block kube-api LB → CAPO usually handles this; check OpenStackCluster status
-- Application credential expired / wrong → re-check `$APP_CRED_ID`
-
----
-
-
-## 15. Extract workload kubeconfig
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- clusterctl get kubeconfig capi-mgmt-cluster \
-  > "$WORK/capi-mgmt-cluster.kubeconfig"
-chmod 600 "$WORK/capi-mgmt-cluster.kubeconfig"
-
-# Sanity-check the workload cluster is reachable
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get nodes
-# Expect: 1 control plane + 2 workers, all Ready
-```
-
-If `get nodes` times out, the cluster's API LB may not have allocated its external IP yet, or the firewall rules don't permit jumphost → workload API:
-
-```bash
-# What IP is the cluster's API LB on?
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get openstackcluster capi-mgmt-cluster \
-  -o json | jq '.status.externalNetwork, .status.controlPlaneEndpoint'
-
-# Test reachability
-curl -sk --max-time 10 "https://<API-IP>:6443/version" && echo " ← reachable" || echo "API LB unreachable"
-```
-
----
-
-
-## 16. `clusterctl init` on target (workload cluster)
-
-The workload cluster must have the same CAPI providers installed before `move`.
-
-```bash
-# Run from jumphost using the workload kubeconfig
-KUBECONFIG="$WORK/capi-mgmt-cluster.kubeconfig" clusterctl init \
-  --core "cluster-api:${CAPI_VERSION}" \
-  --infrastructure "openstack:${CAPO_VERSION}" \
-  --bootstrap "canonical-kubernetes:${CK8S_VERSION}" \
-  --control-plane "canonical-kubernetes:${CK8S_VERSION}" \
-  --cert-manager-version "${CERT_MANAGER_VERSION}"
-
-# ORC into workload cluster too
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" apply \
-  -f "https://github.com/k-orc/openstack-resource-controller/releases/${ORC_VERSION}/orc.yaml"
-
-# Wait for everything Available
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m deployment --all -n capi-system
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m deployment --all -n capo-system
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m deployment --all -n cert-manager
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" wait \
-  --for=condition=Available --timeout=5m deployment --all -n orc-system
-```
-
-> **cert-manager double-install caveat:** if CK8s already installed cert-manager during workload bootstrap, the second `clusterctl init` may warn or skip. Check existing cert-manager version against `$CERT_MANAGER_VERSION` — if they differ, version-skew issues may surface post-pivot. Adjust the pin in §4 or accept the existing version. Roosevelt's standard practice is to install cert-manager via `clusterctl init` only (don't pre-install via CK8s) — same approach valid here if you want clean version control.
-
----
-
-
-## 17. `clusterctl move` pivot
-
-Move all CAPI CRs from bootstrap k3s → workload cluster:
-
-```bash
-# Stage the target kubeconfig on capi-mgmt (where clusterctl move runs)
-scp "$WORK/capi-mgmt-cluster.kubeconfig" ubuntu@$CAPI_MGMT_METAL_IP:/home/ubuntu/target.kubeconfig
-
-# Dry-run first to catch issues before commit
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- clusterctl move \
-  --to-kubeconfig=/home/ubuntu/target.kubeconfig \
-  --dry-run
-
-# Inspect dry-run output: list of objects to be moved. Should include:
-#   - Cluster, OpenStackCluster, OpenStackClusterTemplate
-#   - Secrets (cloud-config)
-#   - Machine objects, OpenStackMachineTemplate
-#   - CK8sControlPlane, CK8sConfigTemplate
-#   - MachineDeployment
-# Should NOT include cert-manager state (cert-manager manages its own state
-# on each cluster independently)
-```
-
-**If dry-run looks correct, execute the move:**
-
-```bash
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- clusterctl move \
-  --to-kubeconfig=/home/ubuntu/target.kubeconfig
-
-# Move can take several minutes. Output ends with: "moved successfully"
-```
-
----
-
-
-## 18. Post-pivot verification
-
-```bash
-echo "=== Bootstrap k3s (should now be empty of cluster CRs) ==="
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get cluster -A
-# Expect: No resources found (or only a header)
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get machines -A
-# Expect: No resources found
-
-ssh ubuntu@$CAPI_MGMT_METAL_IP -- kubectl get openstackcluster -A
-# Expect: No resources found
-
-echo ""
-echo "=== Workload cluster (should now own its own cluster CRs) ==="
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get cluster -A
-# Expect: capi-mgmt-cluster shown, phase=Provisioned
-
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get machines -A
-# Expect: 3 machines (1 control-plane + 2 workers), all Running
-
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get openstackcluster -A
-
-echo ""
-echo "=== CAPI controllers in workload ==="
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get pods -A \
-  | grep -E "(capi|capo|orc|cert-manager)" | grep -v "Running\|Completed"
-# Expect: empty (all controller pods Running)
-
-echo ""
-echo "=== OCCM not crash-looping (CRITICAL — main goal of TLS-verify work) ==="
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" get pods -n kube-system \
-  -l k8s-app=openstack-cloud-controller-manager
-# Expect: 1 pod Running, NOT CrashLoopBackOff
-
-kubectl --kubeconfig "$WORK/capi-mgmt-cluster.kubeconfig" logs -n kube-system \
-  -l k8s-app=openstack-cloud-controller-manager --tail=50 \
-  | grep -iE "(tls|cert|error)" | head -20
-# Expect: no TLS/cert errors; OCCM should be healthy
-```
-
-> **If OCCM crash-loops with "x509: certificate signed by unknown authority":** Vault CA distribution failed. Check (a) `/usr/local/share/ca-certificates/vault-ca.crt` exists on workload nodes; (b) `update-ca-certificates` ran (check `/etc/ssl/certs/ca-certificates.crt` for the Vault CA's subject); (c) the secret reference in CK8sConfigTemplate matched the secret name. SSH into a worker via the jumphost key (`ssh -i $WORK/capi-workload-key ubuntu@<worker-IP-via-FIP>`) to diagnose.
-
----
-
-
-## 19. Handoff to v1-do-doc-08
-
-The workload kubeconfig at `$WORK/capi-mgmt-cluster.kubeconfig` is the input to `v1-do-doc-08-magnum-driver.md`. Copy it to a stable path:
-
-```bash
-mkdir -p $HOME/magnum-capi
-cp "$WORK/capi-mgmt-cluster.kubeconfig" $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig
-chmod 600 $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig
-echo "Workload kubeconfig staged at: $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig"
-```
-
-> **Important — post-pivot semantic:** Magnum's `kubeconfig_file` setting (under `[capi_helm]` in `/etc/magnum/magnum.conf.d/99-capi.conf`, per D-007 corrected language) points to the workload cluster, not the bootstrap k3s. With pivot mandatory, Magnum's CAPI calls flow:
->
-> ```
-> Magnum/leader → workload cluster API → CAPI controllers (running in workload)
-> → create new Cluster CRs (tenant Magnum clusters)
-> ```
->
-> The bootstrap k3s on capi-mgmt is now disposable. For v1 testcloud, leave capi-mgmt running so its k3s can be inspected for diagnostics. Roosevelt may destroy capi-mgmt entirely at this point for cost savings.
-
----
-
-
-## 20. Acceptance criteria — go/no-go for v1-do-doc-08
-
-- [ ] §4 pins captured to `$DEPLOY_RECORD` and `KUBERNETES_VERSION` set
-- [ ] §5 capi-mgmt MAAS-deployed (status `Deployed`)
-- [ ] §6 Vault CA installed on capi-mgmt; `openssl s_client` against Keystone returns `Verify return code: 0 (ok)`
-- [ ] §7-10 k3s + CAPI controllers + ORC all Running
-- [ ] §11 cloud-side resources present (project, user, role assignments, app cred, keypair, image, flavor)
-- [ ] §13 cluster template renders with no unsubstituted `${...}` markers; YAML parses
-- [ ] §14 workload cluster Ready
-- [ ] §15 workload kubeconfig extracted; `kubectl get nodes` shows 3 nodes Ready
-- [ ] §16 workload cluster has CAPI providers installed
-- [ ] §17 `clusterctl move` reported "moved successfully"
-- [ ] §18 bootstrap k3s now empty; workload cluster owns Cluster/Machines/etc.; OCCM Running not CrashLoopBackOff
-- [ ] §19 workload kubeconfig staged at `$HOME/magnum-capi/capi-mgmt-cluster.kubeconfig`
-
-If all checked, proceed to `v1-do-doc-08-magnum-driver.md`.
-
----
-
-
-## 21. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Workload image | Default `noble-amd64` from cloud-images.ubuntu.com | Custom image baked with Vault CA pre-installed (no runtime install step) |
-| Vault CA distribution | CK8sConfig `files:` + `preRunCommands:` (this runbook) | Image-baked + CK8sConfig (defense in depth) |
-| App credential lifetime | No expiry set (testcloud) | Short-lived rotating credentials via Vault auth method |
-| Workload cluster control plane | 1 node | 3 nodes (HA) |
-| Workload cluster workers | 2 nodes | Per-tenant sizing; HPA-driven |
-| `clusterctl init --cert-manager-version` | Pin from §4 | Pin to Vault PKI cert-manager profile (separate Roosevelt prep) |
-| capi-mgmt VM lifecycle post-pivot | Kept running for diagnostics | Destroyed (cost savings; pivot makes it disposable) |
-| Version pinning record | `$HOME/deploy-records/<timestamp>/capi-pins/` | Same pattern, captured in Vault as audit artifact |
-| Authentication to GitHub API | Optional PAT | Mandatory PAT (avoid rate-limit during automated rebuilds) |
-
----
-
-
-## 22. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-22 | Original runbook 04a created. Vault CA distribution (no tls-insecure), mandatory `clusterctl move` pivot, pin-at-execution version model. | Workstream 3b |
-| 2026-05-27 | Adapted into v1-do-doc-07. Fixes: `$REPO` path; `$VAULT_CA` path; `$MAAS_PROFILE` set; §4 dynamic KUBERNETES_VERSION discovery; §5 MAAS poll exit converted to non-exiting [FAIL]; §11 noble-amd64 missing branch converted to [FAIL]; cross-references updated to v1-do-doc set. | Batch C drafting |
diff --git a/runbooks/v1-do-doc-08-magnum-driver.md b/runbooks/v1-do-doc-08-magnum-driver.md
deleted file mode 100644
index 6e8fb90..0000000
--- a/runbooks/v1-do-doc-08-magnum-driver.md
+++ /dev/null
@@ -1,534 +0,0 @@
-# v1 Do-Document 08 — Magnum CAPI Helm Driver Install
-
-**Status:** Third execution document of Batch C. Last document in the Magnum/CAPI stack. Grafts the CAPI Helm driver onto the deployed Magnum so `openstack coe cluster create` provisions tenant K8s clusters via the workload cluster's CAPI controllers (not via the deprecated Heat driver).
-
-**Position in sequence:** Runs after `v1-do-doc-07-capi-bootstrap.md` (workload cluster + kubeconfig staged at `$HOME/magnum-capi/capi-mgmt-cluster.kubeconfig`). Final document of Batch C. Followed by Batch D (tenant + DNS + validate).
-
-**Replaces:** `runbooks/05-magnum-capi-driver.md` — same substantive procedure with fixes applied. The old runbook moves to `runbooks/deprecated/` as part of this batch's commits.
-
-**Fixes applied vs the prior runbook (`runbooks/05-magnum-capi-driver.md`):**
-
-- §1 — removed the "Known doc inconsistency (tracked for cleanup)" notice about D-007. D-007's Layer B language was corrected in the 2026-05-22 change-log entry of `docs/design-decisions.md`; the notice is now obsolete.
-- Cross-references updated: "runbook 04" → v1-do-doc-06; kubeconfig source documented as staged by v1-do-doc-07 §19.
-- §11 optional smoketest `exit 1` on `CREATE_FAILED` converted to non-exiting `[FAIL]` report — the smoketest is optional and a failure should not kill the operator's shell.
-
-**Cross-references:**
-
-- D-007 Layer B (Magnum two-layer install)
-- D-017 (CAPI bootstrap cluster lifecycle)
-- v1-do-doc-07 §19 (workload kubeconfig handoff)
-- Workstream 3c decision (2026-05-22): magnum-capi-helm 1.1.0 from PyPI; workload-cluster kubeconfig (NOT bootstrap k3s)
-
----
-
-
-## 1. Purpose & scope
-
-Graft the CAPI Helm driver onto the Charmed Magnum deployment so that `openstack coe cluster create` provisions tenant K8s clusters via CAPI (in the workload cluster) instead of via the deprecated Heat driver.
-
-**Output of this runbook:**
-
-- `magnum-capi-helm==1.1.0` installed on the magnum unit's system Python.
-- `/etc/magnum/kubeconfig` populated with the workload cluster's kubeconfig (post-pivot CAPI controller plane).
-- `/etc/magnum/magnum.conf.d/99-capi.conf` configured with `enabled_drivers = k8s_capi_helm_v1` and `[capi_helm] kubeconfig_file=`.
-- Systemd overrides on `magnum-api` and `magnum-conductor` that replace the init.d wrapper's ExecStart with explicit `--config-dir` invocation.
-- Both services running cleanly with the CAPI driver loaded.
-
-**Scope:** v1 testcloud. Roosevelt deltas in §12.
-
-**Out of scope:**
-
-- Magnum domain setup (v1-do-doc-06)
-- Workload cluster lifecycle (v1-do-doc-07)
-- Smoketest tenant cluster creation is OPTIONAL (§11) — full validation framework belongs in v1-do-doc-11.
-
----
-
-
-## 2. Decisions captured
-
-| Decision | Choice | Reason |
-|---|---|---|
-| Driver pin | `magnum-capi-helm==1.1.0` from PyPI | D-007 correction (stackhpc fork archived Dec 2024; canonical project on opendev/PyPI; 1.1.0 is last Caracal-cycle release) |
-| Install method | `pip3 install --break-system-packages` | PEP 668 — Ubuntu 22.04+ requires explicit override for system-site-packages install |
-| Install scope | System Python on magnum unit (not venv) | Magnum charm uses system-packaged python at `/usr/lib/python3/dist-packages/magnum/`; driver must import from same site |
-| Kubeconfig target | Workload cluster (post-pivot) | Workstream 3b — bootstrap k3s is empty post-pivot; CAPI controllers live in workload |
-| Kubeconfig source | `$HOME/magnum-capi/capi-mgmt-cluster.kubeconfig` (staged by v1-do-doc-07 §19) | Documented handoff |
-| Driver entry-point name | `k8s_capi_helm_v1` | Per upstream magnum-capi-helm 1.1.0; verify in §5 |
-| Conf.d filename | `99-capi.conf` | Numeric prefix ensures it loads AFTER any charm-managed conf, so `enabled_drivers` override wins |
-| File encoding | ASCII-only | Non-ASCII in conf.d causes silent magnum daemon failures (handoff lesson; cf. Horizon `local_settings.d` issue) |
-| Trustee credential | Existing magnum-shared user (charm-managed) | Roosevelt will use app-credential pattern |
-
----
-
-
-## 3. Prerequisites
-
-| Prereq | Verification |
-|---|---|
-| Magnum charm active/idle | `juju status magnum | grep magnum/0` shows `active idle` |
-| Magnum domain setup completed (v1-do-doc-06) | `( source $HOME/admin-openrc; openstack domain show magnum -f value -c enabled )` returns `True` |
-| Workload cluster reachable from jumphost | `kubectl --kubeconfig $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig get nodes` returns Ready nodes |
-| CAPI controllers running in workload cluster | `kubectl --kubeconfig $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig get pods -n capi-system | grep -v Running | grep -v NAME` empty |
-| Workload kubeconfig staged at expected path | `test -r $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig && stat -c %a $HOME/magnum-capi/capi-mgmt-cluster.kubeconfig` shows `600` |
-| `juju exec` works to magnum/leader (use exec, NOT ssh, for non-interactive — handoff lesson) | `juju exec --unit magnum/leader -- hostname` returns the unit hostname |
-
-**Set shell context:**
-
-```bash
-export WORK=$HOME/magnum-capi
-export WORKLOAD_KUBECONFIG=$WORK/capi-mgmt-cluster.kubeconfig
-export DRIVER_VERSION=magnum-capi-helm==1.1.0   # per D-007 correction
-cd "$WORK"
-```
-
-> **`juju ssh` vs `juju exec` choice:** the handoff lessons explicitly call out that `juju ssh` hangs when stdout is redirected (PTY allocation issue). This runbook uses `juju exec` for all non-interactive command execution and reserves `juju ssh` only for cases where you actually want an interactive shell.
-
----
-
-
-## 4. Pre-flight: capture current state
-
-Capture the magnum unit's state BEFORE making changes. Useful for diagnosis if anything goes wrong, and as a record of what was changed.
-
-```bash
-mkdir -p "$WORK/pre-state"
-
-# Service unit files (as managed by charm)
-juju exec --unit magnum/leader -- \
-  'sudo systemctl cat magnum-api magnum-conductor 2>&1' \
-  > "$WORK/pre-state/systemd-units.txt"
-
-# Currently-enabled drivers
-juju exec --unit magnum/leader -- \
-  'sudo grep -r enabled_drivers /etc/magnum/ 2>/dev/null || echo "(no enabled_drivers found — charm default applies)"' \
-  > "$WORK/pre-state/drivers-pre.txt"
-
-# Python site-packages — see what's already installed
-juju exec --unit magnum/leader -- \
-  'sudo pip3 list 2>/dev/null | grep -iE "magnum|cluster|helm|kubernetes" || true' \
-  > "$WORK/pre-state/pip-pre.txt"
-
-# conf.d state
-juju exec --unit magnum/leader -- \
-  'sudo ls -la /etc/magnum/magnum.conf.d/ 2>/dev/null || echo "(no conf.d directory)"' \
-  > "$WORK/pre-state/confd-pre.txt"
-
-# Service running state
-juju exec --unit magnum/leader -- \
-  'sudo systemctl is-active magnum-api magnum-conductor' \
-  > "$WORK/pre-state/service-state-pre.txt"
-
-# Display the captured state
-cat "$WORK/pre-state/"*.txt
-```
-
-> **What to look for in pre-state:** the charm-managed `enabled_drivers` value probably includes Heat-based drivers (`heat_kubernetes`, etc.). The 99-capi.conf override in §7 replaces this with the single CAPI driver. The pre-state capture documents what was active before the override took effect.
-
----
-
-
-## 5. Install magnum-capi-helm 1.1.0
-
-```bash
-juju exec --unit magnum/leader -- \
-  "sudo pip3 install $DRIVER_VERSION --break-system-packages"
-```
-
-**Verify install:**
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo pip3 show magnum-capi-helm | head -10'
-# Expect: Name: magnum-capi-helm
-#         Version: 1.1.0
-#         Location: /usr/lib/python3/dist-packages
-
-juju exec --unit magnum/leader -- \
-  'sudo python3 -c "import magnum_capi_helm; print(magnum_capi_helm.__file__)"'
-# Expect: /usr/lib/python3/dist-packages/magnum_capi_helm/__init__.py
-```
-
-**Check that the driver entry point is registered:**
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo python3 -c "
-from stevedore import driver
-mgr = driver.DriverManager(
-    namespace=\"magnum.drivers\",
-    name=\"k8s_capi_helm_v1\",
-    invoke_on_load=False
-)
-print(\"Driver class:\", mgr.driver)
-"'
-# Expect: Driver class: <class 'magnum_capi_helm.driver.Driver'>
-# (or similar — the actual class path is package-version-dependent)
-```
-
-> If the entry point check fails with "No 'k8s_capi_helm_v1' driver found", the driver name in 1.1.0 may differ from what D-007 documented. Inspect the installed package's `entry_points.txt`:
->
-> ```bash
-> juju exec --unit magnum/leader -- \
->   'sudo cat /usr/lib/python3/dist-packages/magnum_capi_helm*.dist-info/entry_points.txt 2>/dev/null'
-> ```
->
-> Find the entry under `[magnum.drivers]` — use that exact name in §7.
-
----
-
-
-## 6. Stage workload kubeconfig on magnum unit
-
-```bash
-# Transfer kubeconfig from jumphost to magnum unit
-juju scp "$WORKLOAD_KUBECONFIG" magnum/leader:/tmp/kubeconfig
-
-# Install with correct ownership/mode in one atomic step
-juju exec --unit magnum/leader -- \
-  'sudo install -m 0640 -o root -g magnum /tmp/kubeconfig /etc/magnum/kubeconfig && sudo rm /tmp/kubeconfig'
-```
-
-**Verify:**
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo ls -la /etc/magnum/kubeconfig'
-# Expect: -rw-r----- 1 root magnum ... /etc/magnum/kubeconfig
-
-# Confirm magnum user can read it
-juju exec --unit magnum/leader -- \
-  'sudo -u magnum cat /etc/magnum/kubeconfig | head -3'
-# Expect: apiVersion: v1 / clusters: / - cluster:
-
-# Confirm kubectl can use it from the magnum unit (sanity check on API reachability)
-juju exec --unit magnum/leader -- \
-  'sudo -u magnum kubectl --kubeconfig /etc/magnum/kubeconfig get nodes 2>&1 | head -10'
-# Expect: NAME ... STATUS=Ready for control plane + workers
-# OR: kubectl not installed (acceptable — magnum-capi-helm uses Python client, not kubectl)
-```
-
-> **Why mode 0640 and group magnum:** kubeconfig contains auth tokens. Mode 0600 (owner-only) wouldn't let the `magnum` system user (which runs magnum-api/conductor) read it. Mode 0640 with `group: magnum` is the minimum-permission setup that works. NOT 0644 — keeps it off other users on the unit.
-
----
-
-
-## 7. Configure `/etc/magnum/magnum.conf.d/99-capi.conf`
-
-Generate the conf locally first (keep paths under `$HOME` for consistency with snap confinement on other steps), then transfer.
-
-**ASCII-only verification is critical** — the handoff documents non-ASCII characters in `conf.d` files causing silent daemon failures (cf. Horizon `local_settings.d`). Use plain straight quotes, ASCII dashes, no smart typography.
-
-```bash
-# Write locally
-cat > "$WORK/99-capi.conf" <<'EOF'
-[DEFAULT]
-enabled_drivers = k8s_capi_helm_v1
-
-[capi_helm]
-kubeconfig_file = /etc/magnum/kubeconfig
-EOF
-
-# Verify it is pure ASCII (no UTF-8 sneakers)
-file "$WORK/99-capi.conf"
-# Expect: ASCII text
-# If it says "UTF-8 Unicode text", STOP and rewrite by hand — even one stray
-# em-dash or smart quote will silently break magnum
-
-# Hex dump check (paranoid mode)
-xxd "$WORK/99-capi.conf" | grep -v "^[0-9a-f]*: [0-9a-f ]*  [a-zA-Z0-9 \[\]=._/]*$" | head -5
-# Expect: empty output (all bytes are printable ASCII)
-```
-
-**Stage and install:**
-
-```bash
-juju scp "$WORK/99-capi.conf" magnum/leader:/tmp/99-capi.conf
-
-juju exec --unit magnum/leader -- \
-  'sudo mkdir -p /etc/magnum/magnum.conf.d && sudo install -m 0644 -o root -g root /tmp/99-capi.conf /etc/magnum/magnum.conf.d/99-capi.conf && sudo rm /tmp/99-capi.conf'
-
-# Verify
-juju exec --unit magnum/leader -- \
-  'sudo ls -la /etc/magnum/magnum.conf.d/ && sudo cat /etc/magnum/magnum.conf.d/99-capi.conf'
-# Expect: file listed; content matches what was written
-```
-
----
-
-
-## 8. Systemd override on magnum-api + magnum-conductor
-
-The Charmed Magnum unit files use a wrapper pattern:
-
-```
-ExecStart=/etc/init.d/magnum-api systemd-start
-```
-
-The wrapper does NOT pass `--config-dir` to magnum-api, so `/etc/magnum/magnum.conf.d/` is never loaded. The 99-capi.conf would have no effect.
-
-Override with explicit `--config-file` + `--config-dir` invocation.
-
-**Generate override files locally:**
-
-```bash
-cat > "$WORK/magnum-api-override.conf" <<'EOF'
-[Service]
-ExecStart=
-ExecStart=/usr/bin/magnum-api --config-file=/etc/magnum/magnum.conf --config-dir=/etc/magnum/magnum.conf.d
-EOF
-
-cat > "$WORK/magnum-conductor-override.conf" <<'EOF'
-[Service]
-ExecStart=
-ExecStart=/usr/bin/magnum-conductor --config-file=/etc/magnum/magnum.conf --config-dir=/etc/magnum/magnum.conf.d
-EOF
-
-# ASCII check
-file "$WORK/magnum-api-override.conf" "$WORK/magnum-conductor-override.conf"
-# Expect: ASCII text x2
-```
-
-> **The empty `ExecStart=` line is critical.** Systemd accumulates ExecStart directives by default; an empty assignment is required to CLEAR the inherited directive before setting the replacement. Without the empty line, the unit would have BOTH the init.d wrapper AND the new direct invocation, and would likely fail to start.
-
-**Install on the unit:**
-
-```bash
-juju scp "$WORK/magnum-api-override.conf" magnum/leader:/tmp/magnum-api-override.conf
-juju scp "$WORK/magnum-conductor-override.conf" magnum/leader:/tmp/magnum-conductor-override.conf
-
-juju exec --unit magnum/leader -- \
-  'sudo mkdir -p /etc/systemd/system/magnum-api.service.d /etc/systemd/system/magnum-conductor.service.d && \
-   sudo install -m 0644 -o root -g root /tmp/magnum-api-override.conf /etc/systemd/system/magnum-api.service.d/override.conf && \
-   sudo install -m 0644 -o root -g root /tmp/magnum-conductor-override.conf /etc/systemd/system/magnum-conductor.service.d/override.conf && \
-   sudo rm /tmp/magnum-api-override.conf /tmp/magnum-conductor-override.conf'
-
-# Reload systemd to pick up the overrides
-juju exec --unit magnum/leader -- 'sudo systemctl daemon-reload'
-
-# Verify the overrides are effective (systemctl cat shows combined unit + overrides)
-juju exec --unit magnum/leader -- 'sudo systemctl cat magnum-api | grep -A1 ExecStart'
-# Expect: TWO ExecStart= lines — the empty clear-line and the new /usr/bin/magnum-api invocation
-juju exec --unit magnum/leader -- 'sudo systemctl cat magnum-conductor | grep -A1 ExecStart'
-# Expect: TWO ExecStart= lines as above for magnum-conductor
-```
-
-> **Charm reconciliation note:** the Magnum charm may rewrite its own systemd units on config changes or upgrades. The drop-in override at `/etc/systemd/system/magnum-api.service.d/override.conf` is OUTSIDE the charm's writable zone and should survive. Verify after any `juju refresh` or `juju config magnum` command by re-running the `systemctl cat` check above.
-
----
-
-
-## 9. Restart services + verify health
-
-```bash
-juju exec --unit magnum/leader -- \
-  'sudo systemctl restart magnum-api magnum-conductor'
-
-# Wait briefly for services to initialize
-sleep 5
-
-# Check active state
-juju exec --unit magnum/leader -- \
-  'sudo systemctl is-active magnum-api magnum-conductor'
-# Expect: active (x2)
-
-# Examine recent journal for errors (the critical step — magnum's silent failure
-# mode means we must read logs, not just trust is-active)
-juju exec --unit magnum/leader -- \
-  'sudo journalctl -u magnum-api --since "2 minutes ago" --no-pager | tail -50'
-juju exec --unit magnum/leader -- \
-  'sudo journalctl -u magnum-conductor --since "2 minutes ago" --no-pager | tail -50'
-```
-
-**Look for these red flags in the logs:**
-
-| Symptom | Likely cause | Remediation |
-|---|---|---|
-| `ImportError: No module named magnum_capi_helm` | §5 pip install failed | Re-run §5; check pip3 output |
-| `EntryPointError: No 'k8s_capi_helm_v1' driver` | Driver entry-point name mismatch | Verify name per §5 footnote; update §7 |
-| Service repeatedly restarts (look for "Started" appearing twice in 10s) | Likely a config error in 99-capi.conf | Re-check ASCII-only; check magnum.conf.d permissions |
-| `kubeconfig_file` not honored | --config-dir not being passed | §8 override not active; re-run `systemctl daemon-reload` |
-| Silent: no error but driver also not loading | Non-ASCII char snuck into a conf | `file /etc/magnum/magnum.conf.d/99-capi.conf` — if it says UTF-8, regenerate |
-
----
-
-
-## 10. CAPI driver enablement check
-
-Verify the driver is actually loaded by Magnum and reachable via the API.
-
-```bash
-source $HOME/admin-openrc
-
-# List supported COE drivers via the Magnum API
-openstack coe cluster template list -f json
-# (empty templates list is fine — we are checking the endpoint responds)
-
-# Direct check on the unit: scan the service's loaded drivers
-juju exec --unit magnum/leader -- \
-  'sudo journalctl -u magnum-conductor --since "5 minutes ago" --no-pager | grep -iE "driver|enabled" | head -20'
-# Expect: a line mentioning k8s_capi_helm_v1 having been loaded
-# (Magnum logs the loaded drivers at startup)
-
-# Definitive check: try creating a cluster template that requires the CAPI driver
-openstack coe cluster template create magnum-capi-driver-check \
-  --image noble-amd64 \
-  --keypair capi-workload-key \
-  --external-network ext_net \
-  --master-flavor capi-mgmt-node \
-  --flavor capi-mgmt-node \
-  --coe kubernetes \
-  --network-driver calico \
-  --labels kube_tag=$KUBERNETES_VERSION
-
-openstack coe cluster template show magnum-capi-driver-check -c name -c coe -c labels
-```
-
-> **If template create fails with "driver not enabled" or similar:** the Magnum API process is not loading the conf.d. Verify the systemd override took effect — `sudo systemctl show magnum-api -p ExecStart` on the unit should show the explicit `--config-dir` invocation. If it still shows the init.d wrapper, the daemon-reload + restart did not pick up the override.
-
-> **`$KUBERNETES_VERSION` carry-over:** if your shell session no longer has `$KUBERNETES_VERSION` set from v1-do-doc-07 §4, re-read it from `$HOME/capi-bootstrap/pins/KUBERNETES_VERSION` or substitute the actual version in the `--labels kube_tag=` flag.
-
-**Cleanup the driver-check template:**
-
-```bash
-openstack coe cluster template delete magnum-capi-driver-check
-```
-
----
-
-
-## 11. Optional smoketest — create a tenant CAPI cluster
-
-This step is **optional**. Full validation belongs in v1-do-doc-11. Use this smoketest only if you want immediate confirmation that the entire chain (Magnum API → conductor → magnum-capi-helm → CAPI controllers in workload cluster → tenant K8s cluster on tenant VMs) works end-to-end.
-
-```bash
-# Create a cluster template tuned for testcloud smoketest
-openstack coe cluster template create magnum-smoketest-template \
-  --image noble-amd64 \
-  --keypair capi-workload-key \
-  --external-network ext_net \
-  --master-flavor capi-mgmt-node \
-  --flavor capi-mgmt-node \
-  --coe kubernetes \
-  --network-driver calico \
-  --labels boot_volume_size=20,kube_tag=$KUBERNETES_VERSION,octavia_provider=ovn
-
-# Create a 1+1 cluster (minimum for smoketest)
-openstack coe cluster create magnum-smoketest \
-  --cluster-template magnum-smoketest-template \
-  --master-count 1 \
-  --node-count 1
-
-# Poll for status (15-20 min typical; CAPI provisions tenant VMs end-to-end)
-SMOKETEST_RESULT=""
-for i in $(seq 1 60); do
-  STATUS=$(openstack coe cluster show magnum-smoketest -c status -f value 2>/dev/null)
-  echo "$(date -Is) status=$STATUS"
-  case "$STATUS" in
-    CREATE_COMPLETE)
-      echo "[OK] Smoketest passed"
-      SMOKETEST_RESULT="pass"
-      break
-      ;;
-    CREATE_FAILED)
-      echo "[FAIL] Smoketest cluster creation failed. Investigate via:"
-      echo "       openstack coe cluster show magnum-smoketest"
-      echo "       openstack stack list  # if any Heat stack remained"
-      echo "       kubectl --kubeconfig \$HOME/magnum-capi/capi-mgmt-cluster.kubeconfig get cluster,machines -A"
-      echo "       juju exec --unit magnum/leader -- sudo journalctl -u magnum-conductor --since '30 minutes ago'"
-      SMOKETEST_RESULT="fail"
-      break
-      ;;
-  esac
-  sleep 30
-done
-
-if [ "$SMOKETEST_RESULT" = "fail" ]; then
-  echo ""
-  echo "[FAIL] Smoketest did not complete cleanly. Stop here, investigate, and decide whether to proceed."
-  echo "       The smoketest cluster may need manual cleanup — see cleanup block below if you want to remove it."
-elif [ -z "$SMOKETEST_RESULT" ]; then
-  echo "[WARN] Smoketest poll timed out without reaching a terminal state. Cluster may still be provisioning."
-  echo "       Manually check with: openstack coe cluster show magnum-smoketest"
-fi
-```
-
-**If the smoketest reached `CREATE_COMPLETE`:**
-
-```bash
-# Retrieve the smoketest cluster's kubeconfig
-openstack coe cluster config magnum-smoketest --dir "$WORK/smoketest-kubeconfig"
-
-# Sanity-check the smoketest cluster
-KUBECONFIG="$WORK/smoketest-kubeconfig/config" kubectl get nodes
-KUBECONFIG="$WORK/smoketest-kubeconfig/config" kubectl get pods -A | head -20
-```
-
-**Cleanup the smoketest cluster (regardless of pass/fail):**
-
-```bash
-openstack coe cluster delete magnum-smoketest 2>/dev/null || echo "(cluster may already be deleting)"
-openstack coe cluster template delete magnum-smoketest-template 2>/dev/null || echo "(template may already be deleted)"
-```
-
-> **What success looks like:** the CAPI controllers in the workload cluster receive the new Cluster CR (created by magnum-capi-helm in response to the Magnum API call), CAPO talks to OpenStack to provision tenant VMs, the tenant VMs join the new K8s cluster, and the new cluster has 1 control plane + 1 worker Ready. Octavia provides the API server LB (visible as a Floating IP in the tenant project).
-
----
-
-
-## 12. Roosevelt deltas (forward-look)
-
-| Aspect | Testcloud (v1) | Roosevelt |
-|---|---|---|
-| Driver pin source | PyPI `magnum-capi-helm==1.1.0` | Internal mirror with checksum verification |
-| Driver pin record | Implicit in this runbook | Captured in Vault as audit artifact alongside CAPI pins |
-| Kubeconfig source | Workload cluster (post-pivot per v1-do-doc-07 §17) | Same |
-| Kubeconfig rotation | Manual on capi-mgmt rebuild | Automated when workload cluster cert rotates |
-| Trustee credential | Charm-default magnum-shared user | Per-tenant app credentials via Vault auth method |
-| Magnum HA | num_units=1 (per D-009 testcloud) | num_units=3 with hacluster + provider VIP |
-| Driver upgrade discipline | Manual re-run of §5 | Tracked maintenance window; Vault audit log |
-| Systemd override | Drop-in at `/etc/systemd/system/magnum-*.service.d/override.conf` | Same — but provided via a charm overlay package, not manual file install |
-| ASCII-only enforcement | Manual check (§7, §8) | Pre-flight lint in `scripts/pre-flight-checks.sh` |
-
----
-
-
-## 13. Documented runtime gotchas (carry-forward from handoff)
-
-These gotchas burned cycles during the Bobcat Magnum CAPI work. Each is explicitly handled in this runbook; collecting them here for visibility:
-
-1. **PEP 668 `--break-system-packages`** (§5). Ubuntu 22.04+ refuses `pip install` against system Python by default. The flag is required for the magnum-capi-helm install path used by Charmed Magnum.
-2. **`juju ssh` hangs on stdout redirect.** PTY allocation issue. This runbook uses `juju exec` for all non-interactive command execution.
-3. **Heredoc nesting in `juju ssh` is fragile.** This runbook writes conf files locally first and uses `juju scp` + `juju exec install` to transfer — single-level only.
-4. **Non-ASCII characters in `conf.d` files cause silent daemon failures.** §7 and §8 both include `file <path>` ASCII verification before transfer.
-5. **`openstack -f value -c X -c Y` outputs in alphabetical field order, not flag order.** This runbook uses single-column queries or `-f json | jq` throughout.
-6. **Charm-managed `enabled_drivers` is overridden, not appended.** The `enabled_drivers = k8s_capi_helm_v1` line in 99-capi.conf REPLACES the charm-default value (which would include the deprecated Heat drivers).
-7. **The systemd override empty `ExecStart=` line is required** to clear the inherited ExecStart before setting the replacement (§8).
-8. **Snap-confined `openstack` CLI cannot read `/tmp`.** This runbook stages files under `$WORK=$HOME/magnum-capi`. The smoketest in §11 also writes to `$WORK/smoketest-kubeconfig`.
-
----
-
-
-## 14. Acceptance criteria — go/no-go for Batch D
-
-- [ ] §4 pre-state captured to `$WORK/pre-state/`
-- [ ] §5 `magnum-capi-helm` installed; `pip3 show magnum-capi-helm` shows version 1.1.0 from `/usr/lib/python3/dist-packages`
-- [ ] §5 driver entry point check returns the driver class (or operator confirmed the actual entry-point name in 1.1.0)
-- [ ] §6 `/etc/magnum/kubeconfig` present with mode 0640 root:magnum; magnum user can read it
-- [ ] §7 `/etc/magnum/magnum.conf.d/99-capi.conf` present; `file` reports ASCII text
-- [ ] §8 systemd overrides at `/etc/systemd/system/magnum-*.service.d/override.conf`; `systemctl cat` shows TWO `ExecStart=` lines for both units
-- [ ] §9 magnum-api and magnum-conductor active; no errors in recent journal
-- [ ] §10 `magnum-capi-driver-check` template creation succeeded; deleted after
-- [ ] §11 OPTIONAL — smoketest cluster reached `CREATE_COMPLETE` and was cleaned up (or skipped intentionally)
-
-If all required (non-optional) items checked, Batch C is complete. Proceed to Batch D (`v1-do-doc-09-tenant.md` first).
-
----
-
-
-## 15. Change log
-
-| Date | Change | Reference |
-|---|---|---|
-| 2026-05-22 | Original runbook 05 created. magnum-capi-helm 1.1.0 from PyPI; workload-cluster kubeconfig (post-pivot per workstream 3b); systemd override pattern; ASCII-only conf.d. | Workstream 3c |
-| 2026-05-27 | Adapted into v1-do-doc-08. Fixes: §1 obsolete D-007 inconsistency notice removed; cross-references updated to v1-do-doc set; §11 smoketest `exit 1` converted to non-exiting `[FAIL]` report (smoketest is optional and should not kill the operator shell). | Batch C drafting |
diff --git a/scripts/review-bundle.py b/scripts/review-bundle.py
new file mode 100644
index 0000000..665b1fa
--- /dev/null
+++ b/scripts/review-bundle.py
@@ -0,0 +1,546 @@
+#!/usr/bin/env python3
+"""
+review-bundle.py -- comprehensive pre-deploy review of the Charmed OpenStack
+Caracal 2024.1 IPv4-only bundle (VR0 / DC0 / Omega test cloud).
+
+READ-ONLY. Encodes every lesson learned from the 2026-05-28/29/30 deploy
+sessions as a fail-closed check. Superset of audit-bundle-fixes.py.
+
+Severities:
+  FAIL  deploy-blocker or known regression       -> exit 1
+  WARN  review item / possible issue             -> exit 1 only under --strict
+  INFO  informational summary                    -> never affects exit
+
+Dependencies: PyYAML only (already used by the existing fix scripts); rest stdlib.
+ASCII-only output by design (non-ASCII has caused silent daemon failures here).
+
+Usage:
+  python3 review-bundle.py [BUNDLE] [--strict] [--quiet]
+    BUNDLE   path to bundle.yaml (default: ./bundle.yaml)
+    --strict treat WARN as failing for exit code
+    --quiet  suppress PASS/INFO lines (show only WARN/FAIL)
+"""
+
+import sys
+import argparse
+import ipaddress
+
+try:
+    import yaml
+except ImportError:
+    sys.stderr.write("ERROR: PyYAML not installed (pip install pyyaml --break-system-packages)\n")
+    sys.exit(2)
+
+# --------------------------------------------------------------------------- #
+# Config -- the known-good baseline. Adjust here if the design changes.
+# --------------------------------------------------------------------------- #
+EXPECTED_APPS = 51
+EXPECTED_RELATIONS = 98
+
+PROVIDER_NET = ipaddress.ip_network("10.12.4.0/22")
+METAL_NET = ipaddress.ip_network("10.12.8.0/22")
+VIP_OCTET_MIN = 224          # MAAS reserved metal VIP range 10.12.8.224-254 (D-020)
+VIP_OCTET_MAX = 254
+
+# BUNDLEFIX-001: the 7 per-endpoint binding keys that were phantom and removed.
+# Final anchors are {"":metal} and {"":metal, public:provider} -> none of these
+# should reappear in any app's effective bindings.
+PHANTOM_BINDING_KEYS = {
+    "admin", "internal", "shared-db", "amqp", "certificates", "cluster", "ha",
+}
+
+# D-020 clustered-API charm -> provider VIP last octet (metal mirrors it).
+EXPECTED_CLUSTERED = {
+    "barbican": 224, "cinder": 226, "glance": 228, "keystone": 229,
+    "magnum": 230, "neutron-api": 231, "nova-cloud-controller": 232,
+    "octavia": 233, "openstack-dashboard": 234, "placement": 235,
+}
+
+# Verified Caracal channel matrix (from prior charmhub verification).
+# WARN-only: channels can be intentionally pinned; flag deviation, do not block.
+OPENSTACK_CORE_CHANNEL = "2024.1/stable"
+OPENSTACK_CORE_CHARMS = {
+    "keystone", "glance", "cinder", "cinder-ceph", "nova-cloud-controller",
+    "nova-compute", "neutron-api", "neutron-api-plugin-ovn", "placement",
+    "octavia", "barbican", "magnum", "magnum-dashboard", "openstack-dashboard",
+}
+CHANNEL_MATRIX = {
+    "ovn-central": "24.03/stable", "ovn-chassis": "24.03/stable",
+    "ceph-mon": "squid/stable", "ceph-osd": "squid/stable",
+    "ceph-fs": "squid/stable", "ceph-radosgw": "squid/stable",
+    "mysql-innodb-cluster": "8.0/stable", "mysql-router": "8.0/stable",
+    "rabbitmq-server": "3.9/stable", "vault": "1.8/stable",
+}
+EXPECTED_BASE = "ubuntu@22.04"   # jammy; Caracal-bundle paradigm (not noble)
+
+MAC_RE = None  # compiled below
+import re
+MAC_RE = re.compile(r"([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}")
+
+# --------------------------------------------------------------------------- #
+# Duplicate-key-detecting YAML loader (PyYAML silently keeps the last dup).
+# --------------------------------------------------------------------------- #
+_DUP_KEYS = []
+
+
+class DupKeyLoader(yaml.SafeLoader):
+    def construct_mapping(self, node, deep=False):
+        seen = set()
+        for key_node, _ in node.value:
+            try:
+                key = self.construct_object(key_node, deep=deep)
+            except Exception:
+                continue
+            if isinstance(key, (str, int, float, bool)) or key is None:
+                if key in seen:
+                    _DUP_KEYS.append((str(key), key_node.start_mark.line + 1))
+                seen.add(key)
+        return super().construct_mapping(node, deep)
+
+
+# --------------------------------------------------------------------------- #
+# Reporter
+# --------------------------------------------------------------------------- #
+class Reporter:
+    def __init__(self, quiet=False):
+        self.quiet = quiet
+        self.rows = []  # (section, level, code, msg)
+        self.counts = {"PASS": 0, "WARN": 0, "FAIL": 0, "INFO": 0}
+
+    def add(self, section, level, code, msg):
+        self.rows.append((section, level, code, msg))
+        self.counts[level] = self.counts.get(level, 0) + 1
+
+    def emit(self):
+        section = None
+        for sec, level, code, msg in self.rows:
+            if self.quiet and level in ("PASS", "INFO"):
+                continue
+            if sec != section:
+                print("\n--- %s ---" % sec)
+                section = sec
+            print("  [%-4s] %-10s %s" % (level, code, msg))
+        print("\n==================== SUMMARY ====================")
+        print("  PASS=%d  WARN=%d  FAIL=%d  INFO=%d"
+              % (self.counts["PASS"], self.counts["WARN"],
+                 self.counts["FAIL"], self.counts["INFO"]))
+
+
+# --------------------------------------------------------------------------- #
+# Helpers
+# --------------------------------------------------------------------------- #
+def ep_app(endpoint):
+    """'keystone:shared-db' -> 'keystone'. Non-str -> None."""
+    if not isinstance(endpoint, str):
+        return None
+    return endpoint.split(":", 1)[0]
+
+
+def in_net(addr, net):
+    try:
+        return ipaddress.ip_address(addr) in net
+    except ValueError:
+        return False
+
+
+# --------------------------------------------------------------------------- #
+# Checks
+# --------------------------------------------------------------------------- #
+def check_ascii(R, text):
+    sec = "0. Structure / integrity"
+    bad = []
+    for i, line in enumerate(text.splitlines(), 1):
+        for ch in line:
+            if ord(ch) > 127:
+                bad.append((i, repr(ch)))
+                break
+    if bad:
+        for ln, ch in bad[:20]:
+            R.add(sec, "WARN", "NON-ASCII",
+                  "non-ASCII char %s on line %d (non-ASCII has caused silent daemon failures here)" % (ch, ln))
+        if len(bad) > 20:
+            R.add(sec, "WARN", "NON-ASCII", "...and %d more non-ASCII line(s)" % (len(bad) - 20))
+    else:
+        R.add(sec, "PASS", "ASCII", "file is pure ASCII")
+
+
+def check_structure(R, doc):
+    sec = "0. Structure / integrity"
+    if not isinstance(doc, dict):
+        R.add(sec, "FAIL", "STRUCT-00", "top-level YAML is not a mapping")
+        return None, None
+    if _DUP_KEYS:
+        for k, ln in _DUP_KEYS:
+            R.add(sec, "FAIL", "DUPKEY", "duplicate key '%s' near line %d" % (k, ln))
+    else:
+        R.add(sec, "PASS", "DUPKEY", "no duplicate keys")
+
+    apps = doc.get("applications")
+    rels = doc.get("relations")
+    if not isinstance(apps, dict):
+        R.add(sec, "FAIL", "STRUCT-APPS", "no 'applications' mapping")
+        apps = {}
+    if not isinstance(rels, list):
+        R.add(sec, "FAIL", "STRUCT-RELS", "no 'relations' list")
+        rels = []
+
+    na, nr = len(apps), len(rels)
+    R.add(sec, "INFO" if na == EXPECTED_APPS else "WARN", "APP-COUNT",
+          "applications=%d (baseline %d)" % (na, EXPECTED_APPS))
+    R.add(sec, "INFO" if nr == EXPECTED_RELATIONS else "WARN", "REL-COUNT",
+          "relations=%d (baseline %d)" % (nr, EXPECTED_RELATIONS))
+    return apps, rels
+
+
+def check_relations(R, apps, rels):
+    sec = "1. Relation integrity"
+    bad_shape = miss_colon = dangling = 0
+    for r in rels:
+        if not (isinstance(r, list) and len(r) == 2):
+            R.add(sec, "FAIL", "REL-SHAPE", "relation not a 2-element list: %r" % (r,))
+            bad_shape += 1
+            continue
+        for e in r:
+            if not isinstance(e, str) or ":" not in e:
+                R.add(sec, "FAIL", "REL-COLON", "endpoint missing colon: %r in %r" % (e, r))
+                miss_colon += 1
+            else:
+                a = ep_app(e)
+                if a not in apps:
+                    R.add(sec, "FAIL", "REL-DANGLE",
+                          "endpoint references unknown app '%s' in %r" % (a, r))
+                    dangling += 1
+    if not (bad_shape or miss_colon or dangling):
+        R.add(sec, "PASS", "REL-INT",
+              "all relations well-formed, colon-explicit, both ends resolve to apps")
+
+
+def check_bindings_phantom(R, apps):
+    sec = "2. BUNDLEFIX-001 (phantom binding keys)"
+    hits = 0
+    for name, spec in apps.items():
+        b = (spec or {}).get("bindings")
+        if not isinstance(b, dict):
+            continue
+        bad = sorted(set(b.keys()) & PHANTOM_BINDING_KEYS)
+        if bad:
+            R.add(sec, "FAIL", "PHANTOM",
+                  "%s has phantom per-endpoint binding key(s): %s" % (name, ", ".join(bad)))
+            hits += 1
+    if not hits:
+        R.add(sec, "PASS", "PHANTOM",
+              "no app reintroduces a removed phantom binding key (%s)"
+              % ", ".join(sorted(PHANTOM_BINDING_KEYS)))
+
+
+def check_vault(R, apps, rels):
+    sec = "3. BUNDLEFIX-002 (vault de-HA)"
+    v = apps.get("vault")
+    if v is None:
+        R.add(sec, "WARN", "VAULT", "no 'vault' app found")
+        return
+    opts = (v or {}).get("options") or {}
+    if "vip" in opts:
+        R.add(sec, "FAIL", "VAULT-VIP", "vault has a 'vip' option (must be de-HA'd): %r" % opts["vip"])
+    else:
+        R.add(sec, "PASS", "VAULT-VIP", "vault has no vip")
+    if "os-public-hostname" in opts:
+        R.add(sec, "WARN", "VAULT-HOST", "vault has os-public-hostname (expected removed)")
+    if "vault-hacluster" in apps:
+        R.add(sec, "FAIL", "VAULT-HA", "vault-hacluster application is present (must be removed)")
+    else:
+        R.add(sec, "PASS", "VAULT-HA", "no vault-hacluster application")
+    for r in rels:
+        if isinstance(r, list) and any(isinstance(e, str) and e.startswith("vault:ha") for e in r):
+            R.add(sec, "FAIL", "VAULT-HAREL", "vault:ha relation present: %r" % (r,))
+
+
+def map_hacluster(apps, rels):
+    """principal -> hacluster_app_name, using charm==hacluster + the :ha relation."""
+    hac_apps = {n for n, s in apps.items() if (s or {}).get("charm") == "hacluster"}
+    principal_of = {}
+    for r in rels:
+        if not (isinstance(r, list) and len(r) == 2):
+            continue
+        a0, a1 = ep_app(r[0]), ep_app(r[1])
+        if a0 in hac_apps and a1 and a1 not in hac_apps:
+            principal_of[a1] = a0
+        elif a1 in hac_apps and a0 and a0 not in hac_apps:
+            principal_of[a0] = a1
+    return hac_apps, principal_of
+
+
+def check_hacluster(R, apps, rels):
+    sec = "4. BUNDLEFIX-003 (hacluster cluster_count)"
+    hac_apps, principal_of = map_hacluster(apps, rels)
+    if not hac_apps:
+        R.add(sec, "WARN", "HAC", "no hacluster apps found")
+        return principal_of
+    principal_for_hac = {h: p for p, h in principal_of.items()}
+    ok = 0
+    for h in sorted(hac_apps):
+        opts = (apps[h].get("options") or {})
+        cc = opts.get("cluster_count")
+        prin = principal_for_hac.get(h)
+        nu = (apps.get(prin, {}) or {}).get("num_units") if prin else None
+        if cc is None:
+            R.add(sec, "FAIL", "HAC-CC", "%s missing cluster_count" % h)
+            continue
+        if not prin:
+            R.add(sec, "WARN", "HAC-PRIN", "%s has no principal via :ha relation" % h)
+        if isinstance(nu, int) and cc > nu:
+            R.add(sec, "FAIL", "HAC-OVER",
+                  "%s cluster_count=%s > principal %s num_units=%s" % (h, cc, prin, nu))
+            continue
+        if cc != 1:
+            R.add(sec, "WARN", "HAC-NE1",
+                  "%s cluster_count=%s (testcloud baseline is 1)" % (h, cc))
+        else:
+            ok += 1
+    if ok:
+        R.add(sec, "PASS", "HAC", "%d hacluster app(s) cluster_count=1 and <= principal num_units" % ok)
+
+
+def check_memcached(R, apps, rels):
+    sec = "5. BUNDLEFIX-004 (memcached)"
+    if "memcached" not in apps:
+        R.add(sec, "FAIL", "MEMCACHE-APP", "no 'memcached' application")
+    else:
+        R.add(sec, "PASS", "MEMCACHE-APP", "memcached application present")
+    found = False
+    for r in rels:
+        if not (isinstance(r, list) and len(r) == 2):
+            continue
+        s = set()
+        for e in r:
+            if isinstance(e, str):
+                s.add(e)
+        if {"nova-cloud-controller:memcache", "memcached:cache"} <= s:
+            found = True
+    R.add(sec, "PASS" if found else "FAIL", "MEMCACHE-REL",
+          "nova-cloud-controller:memcache <-> memcached:cache relation %s"
+          % ("present" if found else "MISSING"))
+
+
+def check_router_bindings(R, apps):
+    sec = "6. BUNDLEFIX-005 (mysql-router metal binding)"
+    routers = [n for n, s in apps.items() if (s or {}).get("charm") == "mysql-router"]
+    if not routers:
+        R.add(sec, "WARN", "ROUTER", "no mysql-router apps found")
+        return
+    bad = 0
+    for n in sorted(routers):
+        b = (apps[n].get("bindings") or {})
+        # effective default space is the "" key; anchors already resolved by yaml
+        default = b.get("", None)
+        non_metal = {k: v for k, v in b.items() if v not in ("metal",)}
+        if default == "metal" and not non_metal:
+            continue
+        if default != "metal":
+            R.add(sec, "FAIL", "ROUTER-BIND",
+                  "%s default space binding is %r (expected metal)" % (n, default))
+            bad += 1
+        elif non_metal:
+            R.add(sec, "WARN", "ROUTER-BIND",
+                  "%s has non-metal endpoint binding(s): %r" % (n, non_metal))
+    if not bad:
+        R.add(sec, "PASS", "ROUTER-BIND",
+              "%d mysql-router app(s) bound to metal" % len(routers))
+
+
+def check_vips(R, apps, rels):
+    sec = "7. BUNDLEFIX-006 / D-020 (dual provider+metal VIPs)"
+    _, principal_of = map_hacluster(apps, rels)
+    clustered = sorted(principal_of.keys())
+    # set comparison vs expected D-020 clustered set
+    got = set(clustered)
+    exp = set(EXPECTED_CLUSTERED)
+    if got != exp:
+        if exp - got:
+            R.add(sec, "WARN", "VIP-SET", "expected-clustered apps NOT detected as clustered: %s"
+                  % ", ".join(sorted(exp - got)))
+        if got - exp:
+            R.add(sec, "WARN", "VIP-SET", "clustered apps beyond the D-020 set: %s"
+                  % ", ".join(sorted(got - exp)))
+    ok = 0
+    for name in clustered:
+        opts = (apps[name].get("options") or {})
+        vip = opts.get("vip")
+        if not vip:
+            R.add(sec, "FAIL", "VIP-MISS", "%s is clustered but has no vip" % name)
+            continue
+        parts = str(vip).split()
+        if len(parts) != 2:
+            R.add(sec, "FAIL", "VIP-DUAL", "%s vip is not dual (got %r)" % (name, vip))
+            continue
+        prov, metal = parts
+        if not in_net(prov, PROVIDER_NET):
+            R.add(sec, "FAIL", "VIP-PROV", "%s provider vip %s not in %s" % (name, prov, PROVIDER_NET))
+            continue
+        if not in_net(metal, METAL_NET):
+            R.add(sec, "FAIL", "VIP-METAL", "%s metal vip %s not in %s" % (name, metal, METAL_NET))
+            continue
+        po, mo = int(prov.split(".")[-1]), int(metal.split(".")[-1])
+        if po != mo:
+            R.add(sec, "FAIL", "VIP-MIRROR", "%s octets differ: provider .%d vs metal .%d" % (name, po, mo))
+            continue
+        if not (VIP_OCTET_MIN <= mo <= VIP_OCTET_MAX):
+            R.add(sec, "FAIL", "VIP-RANGE",
+                  "%s metal vip octet .%d outside reserved %d-%d" % (name, mo, VIP_OCTET_MIN, VIP_OCTET_MAX))
+            continue
+        expected_octet = EXPECTED_CLUSTERED.get(name)
+        if expected_octet is not None and po != expected_octet:
+            R.add(sec, "WARN", "VIP-OCTET",
+                  "%s vip octet .%d != D-020 map .%d" % (name, po, expected_octet))
+        ok += 1
+    if ok:
+        R.add(sec, "PASS", "VIP-DUAL",
+              "%d clustered API charm(s) have mirrored dual VIPs in the reserved range" % ok)
+
+
+def check_osd(R, apps):
+    sec = "8. Anti-pattern: ceph-osd osd-devices"
+    osds = [n for n, s in apps.items() if (s or {}).get("charm") == "ceph-osd"]
+    if not osds:
+        R.add(sec, "WARN", "OSD", "no ceph-osd app found")
+        return
+    for n in osds:
+        dev = (apps[n].get("options") or {}).get("osd-devices")
+        if not dev or not isinstance(dev, str) or not dev.strip().startswith("/"):
+            R.add(sec, "FAIL", "OSD-DEV", "%s osd-devices not a real path: %r" % (n, dev))
+        else:
+            note = ""
+            if "/dev/disk/by-" not in dev:
+                note = " (kernel-name; by-path/by-id is harder for bare metal -- Roosevelt note)"
+            R.add(sec, "PASS", "OSD-DEV", "%s osd-devices=%s%s" % (n, dev.strip(), note))
+
+
+def check_ovn(R, apps):
+    sec = "9. Anti-pattern: ovn-chassis mappings (MAC over NIC name)"
+    chassis = [n for n, s in apps.items() if (s or {}).get("charm") == "ovn-chassis"]
+    if not chassis:
+        R.add(sec, "WARN", "OVN", "no ovn-chassis app found")
+        return
+    for n in sorted(chassis):
+        opts = (apps[n].get("options") or {})
+        bim = opts.get("bridge-interface-mappings")
+        if not bim:
+            R.add(sec, "INFO", "OVN-BIM", "%s has no bridge-interface-mappings (expected for octavia-side chassis)" % n)
+            continue
+        if MAC_RE.search(str(bim)):
+            R.add(sec, "PASS", "OVN-BIM", "%s bridge-interface-mappings is MAC-based" % n)
+        else:
+            R.add(sec, "WARN", "OVN-BIM",
+                  "%s bridge-interface-mappings has no MAC (NIC-name? fragile): %r" % (n, bim))
+
+
+def check_os_networks(R, apps, rels):
+    sec = "10. D-020: spaces-native (no os-*-network pinning)"
+    _, principal_of = map_hacluster(apps, rels)
+    flagged = 0
+    for name in sorted(principal_of):
+        opts = (apps[name].get("options") or {})
+        for k in ("os-internal-network", "os-admin-network", "os-public-network"):
+            if k in opts:
+                R.add(sec, "WARN", "OS-NET",
+                      "%s sets %s (D-020 found spaces-native resolve sufficient; verify intent)" % (name, k))
+                flagged += 1
+    if not flagged:
+        R.add(sec, "PASS", "OS-NET", "no clustered charm pins os-*-network (spaces-native, per D-020)")
+
+
+def expected_channel(charm):
+    if charm in CHANNEL_MATRIX:
+        return CHANNEL_MATRIX[charm]
+    if charm in OPENSTACK_CORE_CHARMS:
+        return OPENSTACK_CORE_CHANNEL
+    return None
+
+
+def check_channels_base(R, apps):
+    sec = "11. Channels / base (verified Caracal matrix; WARN-only)"
+    mismatch = 0
+    for name, spec in sorted(apps.items()):
+        spec = spec or {}
+        charm = spec.get("charm")
+        ch = spec.get("channel")
+        exp = expected_channel(charm)
+        if exp and ch and ch != exp:
+            R.add(sec, "WARN", "CHANNEL", "%s (%s) channel=%s expected=%s" % (name, charm, ch, exp))
+            mismatch += 1
+        base = spec.get("base")
+        series = spec.get("series")
+        if base and base != EXPECTED_BASE:
+            R.add(sec, "WARN", "BASE", "%s base=%s expected=%s" % (name, base, EXPECTED_BASE))
+        if series and series not in ("jammy",):
+            R.add(sec, "WARN", "SERIES", "%s series=%s expected=jammy" % (name, series))
+    if not mismatch:
+        R.add(sec, "PASS", "CHANNEL", "no charm deviates from the known Caracal channel matrix")
+
+
+def summary_tables(R, apps, rels):
+    sec = "12. Inventory (informational)"
+    _, principal_of = map_hacluster(apps, rels)
+    for name in sorted(principal_of):
+        vip = ((apps[name].get("options") or {}).get("vip"))
+        R.add(sec, "INFO", "CLUSTERED", "%-26s vip=%s" % (name, vip))
+    routers = sorted(n for n, s in apps.items() if (s or {}).get("charm") == "mysql-router")
+    R.add(sec, "INFO", "ROUTERS", "%d mysql-router apps: %s" % (len(routers), ", ".join(routers)))
+
+
+# --------------------------------------------------------------------------- #
+# Main
+# --------------------------------------------------------------------------- #
+def main():
+    ap = argparse.ArgumentParser(description="Comprehensive Caracal bundle reviewer (read-only).")
+    ap.add_argument("bundle", nargs="?", default="bundle.yaml")
+    ap.add_argument("--strict", action="store_true", help="treat WARN as failing for exit code")
+    ap.add_argument("--quiet", action="store_true", help="show only WARN/FAIL")
+    args = ap.parse_args()
+
+    try:
+        with open(args.bundle, "r", encoding="utf-8", errors="replace") as fh:
+            text = fh.read()
+    except FileNotFoundError:
+        sys.stderr.write("ERROR: bundle not found: %s\n" % args.bundle)
+        return 2
+
+    try:
+        doc = yaml.load(text, Loader=DupKeyLoader)
+    except yaml.YAMLError as e:
+        sys.stderr.write("ERROR: YAML parse failed: %s\n" % e)
+        return 2
+
+    R = Reporter(quiet=args.quiet)
+    print("================ Caracal v1 bundle review: %s ================" % args.bundle)
+
+    check_ascii(R, text)
+    apps, rels = check_structure(R, doc)
+    if apps is None:
+        R.emit()
+        return 1
+    check_relations(R, apps, rels)
+    check_bindings_phantom(R, apps)
+    check_vault(R, apps, rels)
+    check_hacluster(R, apps, rels)
+    check_memcached(R, apps, rels)
+    check_router_bindings(R, apps)
+    check_vips(R, apps, rels)
+    check_osd(R, apps)
+    check_ovn(R, apps)
+    check_os_networks(R, apps, rels)
+    check_channels_base(R, apps)
+    summary_tables(R, apps, rels)
+
+    R.emit()
+    fail = R.counts["FAIL"] > 0
+    warn = R.counts["WARN"] > 0
+    if fail or (args.strict and warn):
+        print("\nVERDICT: NOT CLEAN" + (" (--strict: WARN counts)" if (warn and not fail) else ""))
+        return 1
+    print("\nVERDICT: CLEAN" + (" (with WARN review items)" if warn else ""))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/setup-gitbucket-repo.sh b/setup-gitbucket-repo.sh
deleted file mode 100644
index 35a6136..0000000
--- a/setup-gitbucket-repo.sh
+++ /dev/null
@@ -1,304 +0,0 @@
-#!/usr/bin/env bash
-# setup-gitbucket-repo.sh
-#
-# Initialize this repository locally and push it to a self-hosted GitBucket
-# instance at git.baldurkeep.com (or any GitBucket-compatible host).
-#
-# Usage:
-#   ./setup-gitbucket-repo.sh                       # interactive prompts
-#   ./setup-gitbucket-repo.sh --dry-run             # show what would happen
-#
-# Environment overrides (skip the prompts when set):
-#   GITBUCKET_HOST    e.g. git.baldurkeep.com
-#   GITBUCKET_USER    GitBucket username
-#   GITBUCKET_OWNER   Repo owner (user or group). Defaults to GITBUCKET_USER.
-#   GITBUCKET_REPO    Repo name. Default: vr0-dc0-caracal
-#   GITBUCKET_TOKEN   API token for creating the repo (if it does not exist yet)
-#   GIT_USER_NAME     Local git author name (e.g. "Jesse Austin")
-#   GIT_USER_EMAIL    Local git author email (e.g. jesse.austin@neumatrix.com)
-#   GIT_REMOTE_PROTO  ssh|https — default https
-#   GIT_BRANCH        default branch name — default main
-#
-# Idempotency:
-#   - Detects existing .git directory and skips `git init`
-#   - Detects existing remote 'origin' and adjusts URL if it differs (with confirmation)
-#   - Will not push if there are no commits (nothing to push)
-#
-# What this script does NOT do:
-#   - Store credentials. You will be prompted by git/SSH for auth at push time.
-#   - Create groups/organizations on GitBucket — the owner must exist already.
-#   - Force-push or rewrite history.
-
-set -euo pipefail
-shopt -s inherit_errexit 2>/dev/null || true
-IFS=$'\n\t'
-
-# ----- Helpers --------------------------------------------------------------
-
-err()  { printf '\033[1;31mERROR\033[0m %s\n' "$*" >&2; }
-warn() { printf '\033[1;33mWARN\033[0m  %s\n' "$*" >&2; }
-info() { printf '\033[1;36mINFO\033[0m  %s\n' "$*"; }
-ok()   { printf '\033[1;32mOK\033[0m    %s\n' "$*"; }
-
-die() { err "$*"; exit 1; }
-
-prompt() {
-  # prompt VAR "Question text" "default"
-  local __var=$1 __q=$2 __default=${3:-}
-  local __reply
-  if [[ -n "${!__var:-}" ]]; then
-    # already set via env; skip
-    return 0
-  fi
-  if [[ -n "$__default" ]]; then
-    read -r -p "$__q [$__default]: " __reply || true
-    __reply=${__reply:-$__default}
-  else
-    read -r -p "$__q: " __reply || true
-  fi
-  if [[ -z "$__reply" ]]; then
-    die "Empty response for $__var"
-  fi
-  printf -v "$__var" '%s' "$__reply"
-}
-
-prompt_secret() {
-  local __var=$1 __q=$2
-  local __reply
-  if [[ -n "${!__var:-}" ]]; then
-    return 0
-  fi
-  read -r -s -p "$__q: " __reply || true
-  echo
-  printf -v "$__var" '%s' "$__reply"
-}
-
-confirm() {
-  local __q=$1
-  local __reply
-  read -r -p "$__q [y/N]: " __reply || true
-  [[ "${__reply,,}" == "y" || "${__reply,,}" == "yes" ]]
-}
-
-# ----- Argument parsing -----------------------------------------------------
-
-DRY_RUN=0
-for arg in "$@"; do
-  case "$arg" in
-    --dry-run) DRY_RUN=1 ;;
-    -h|--help)
-      sed -n '1,40p' "$0" | grep -E '^#'
-      exit 0
-      ;;
-    *) die "Unknown argument: $arg" ;;
-  esac
-done
-
-run() {
-  # Wrapper that echoes and (in dry-run) skips execution.
-  # Subshell with IFS=' ' so $* joins args with a space for display only.
-  (IFS=' '; printf '\033[1;90m+ %s\033[0m\n' "$*")
-  if [[ "$DRY_RUN" -eq 0 ]]; then
-    "$@"
-  fi
-}
-
-# ----- Repo root sanity -----------------------------------------------------
-
-SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
-cd "$SCRIPT_DIR"
-
-# Sanity check that we're in the expected repo structure.
-for required in README.md bundle.yaml runbooks docs netbox; do
-  [[ -e "$required" ]] || die "Required path '$required' missing — are you in the repo root?"
-done
-
-ok "Running from repo root: $SCRIPT_DIR"
-
-# ----- Tool checks ----------------------------------------------------------
-
-command -v git  >/dev/null || die "git not installed"
-command -v curl >/dev/null || warn "curl not installed — repo auto-create on GitBucket disabled"
-
-# ----- Gather configuration -------------------------------------------------
-
-prompt GITBUCKET_HOST   "GitBucket host"               "git.baldurkeep.com"
-prompt GITBUCKET_USER   "GitBucket username"
-prompt GITBUCKET_OWNER  "Repo owner (user or group)"   "$GITBUCKET_USER"
-prompt GITBUCKET_REPO   "Repo name"                     "openstack-caracal-ipv4"
-prompt GIT_USER_NAME    "Git author name"
-prompt GIT_USER_EMAIL   "Git author email"
-prompt GIT_REMOTE_PROTO "Remote protocol (ssh|https)"  "https"
-prompt GIT_BRANCH       "Default branch"               "main"
-
-case "$GIT_REMOTE_PROTO" in
-  ssh)
-    REMOTE_URL="git@${GITBUCKET_HOST}:${GITBUCKET_OWNER}/${GITBUCKET_REPO}.git"
-    ;;
-  https)
-    REMOTE_URL="https://${GITBUCKET_HOST}/git/${GITBUCKET_OWNER}/${GITBUCKET_REPO}.git"
-    ;;
-  *)
-    die "GIT_REMOTE_PROTO must be 'ssh' or 'https' (got: $GIT_REMOTE_PROTO)"
-    ;;
-esac
-
-cat <<EOF
-
------ Configuration --------------------------------------------------------
-GitBucket host  : $GITBUCKET_HOST
-GitBucket user  : $GITBUCKET_USER
-Repo owner      : $GITBUCKET_OWNER
-Repo name       : $GITBUCKET_REPO
-Git author      : $GIT_USER_NAME <$GIT_USER_EMAIL>
-Remote URL      : $REMOTE_URL
-Default branch  : $GIT_BRANCH
-Dry-run         : $([[ $DRY_RUN -eq 1 ]] && echo YES || echo no)
-----------------------------------------------------------------------------
-EOF
-
-confirm "Proceed?" || die "Aborted by user"
-
-# ----- Create repo on GitBucket via API (if token provided) -----------------
-
-GITBUCKET_API_BASE="https://${GITBUCKET_HOST}/api/v3"
-REPO_API_URL="${GITBUCKET_API_BASE}/repos/${GITBUCKET_OWNER}/${GITBUCKET_REPO}"
-
-create_repo_via_api() {
-  if [[ -z "${GITBUCKET_TOKEN:-}" ]]; then
-    info "GITBUCKET_TOKEN not set — skipping API repo creation"
-    info "If the repo does not exist on GitBucket, create it manually now:"
-    info "  https://${GITBUCKET_HOST}/${GITBUCKET_OWNER}"
-    info "  → New repository → name: ${GITBUCKET_REPO}, do NOT initialize with README"
-    return 0
-  fi
-
-  if ! command -v curl >/dev/null; then
-    warn "curl missing; cannot call API"
-    return 0
-  fi
-
-  info "Checking if repo already exists on GitBucket..."
-  local http_code
-  http_code=$(curl -sS -o /dev/null -w '%{http_code}' \
-    -H "Authorization: token ${GITBUCKET_TOKEN}" \
-    "$REPO_API_URL" || true)
-
-  case "$http_code" in
-    200)
-      ok "Repo ${GITBUCKET_OWNER}/${GITBUCKET_REPO} already exists on GitBucket"
-      return 0
-      ;;
-    404)
-      info "Repo does not exist — creating via API"
-      ;;
-    *)
-      warn "Unexpected API response code: $http_code (continuing)"
-      return 0
-      ;;
-  esac
-
-  local create_url
-  if [[ "$GITBUCKET_OWNER" == "$GITBUCKET_USER" ]]; then
-    # User-owned repo
-    create_url="${GITBUCKET_API_BASE}/user/repos"
-  else
-    # Group-owned repo
-    create_url="${GITBUCKET_API_BASE}/orgs/${GITBUCKET_OWNER}/repos"
-  fi
-
-  local payload
-  payload=$(printf '{"name":"%s","description":"%s","private":true,"auto_init":false}' \
-    "$GITBUCKET_REPO" \
-    "Charmed OpenStack Caracal 2024.1 — IPv4-only testcloud deployment (VR0 DC0 v1). IPv6/dual-stack tracked separately as v2.")
-
-  if [[ "$DRY_RUN" -eq 1 ]]; then
-    info "[dry-run] would POST to $create_url with payload: $payload"
-    return 0
-  fi
-
-  http_code=$(curl -sS -o /tmp/gitbucket-create.json -w '%{http_code}' \
-    -X POST \
-    -H "Authorization: token ${GITBUCKET_TOKEN}" \
-    -H "Content-Type: application/json" \
-    -d "$payload" \
-    "$create_url" || true)
-
-  case "$http_code" in
-    200|201)
-      ok "Repo created: ${GITBUCKET_OWNER}/${GITBUCKET_REPO}"
-      ;;
-    *)
-      err "Repo creation failed (HTTP $http_code). Response:"
-      cat /tmp/gitbucket-create.json >&2 || true
-      die "Aborting before git operations"
-      ;;
-  esac
-}
-
-create_repo_via_api
-
-# ----- Git init -------------------------------------------------------------
-
-if [[ -d .git ]]; then
-  info ".git exists — skipping git init"
-else
-  run git init -b "$GIT_BRANCH"
-fi
-
-run git config user.name  "$GIT_USER_NAME"
-run git config user.email "$GIT_USER_EMAIL"
-
-# ----- Remote setup ---------------------------------------------------------
-
-if git remote get-url origin >/dev/null 2>&1; then
-  EXISTING_URL=$(git remote get-url origin)
-  if [[ "$EXISTING_URL" != "$REMOTE_URL" ]]; then
-    warn "Existing 'origin' remote URL: $EXISTING_URL"
-    warn "Desired URL:                  $REMOTE_URL"
-    if confirm "Update remote URL?"; then
-      run git remote set-url origin "$REMOTE_URL"
-    fi
-  else
-    ok "Remote 'origin' already set correctly"
-  fi
-else
-  run git remote add origin "$REMOTE_URL"
-fi
-
-# ----- Stage + commit -------------------------------------------------------
-
-run git add .
-
-if [[ "$DRY_RUN" -eq 0 ]]; then
-  if git diff --staged --quiet; then
-    info "No staged changes — nothing to commit"
-  else
-    if [[ -z "$(git log --oneline -1 2>/dev/null || true)" ]]; then
-      MSG="Initial commit — VR0 DC0 Omega Cloud Caracal repo scaffolding"
-    else
-      MSG="Update repo content"
-    fi
-    run git commit -m "$MSG"
-  fi
-else
-  info "[dry-run] would commit (skipping)"
-fi
-
-# ----- Push ----------------------------------------------------------------
-
-if [[ "$DRY_RUN" -eq 1 ]]; then
-  info "[dry-run] would push to origin/$GIT_BRANCH"
-  exit 0
-fi
-
-if [[ -z "$(git log --oneline -1 2>/dev/null || true)" ]]; then
-  info "No commits to push"
-  exit 0
-fi
-
-info "Pushing to origin/$GIT_BRANCH (you may be prompted for credentials)..."
-run git push -u origin "$GIT_BRANCH"
-
-ok "Push complete"
-ok "Repository ready at: https://${GITBUCKET_HOST}/${GITBUCKET_OWNER}/${GITBUCKET_REPO}"