diff --git a/bundle.yaml b/bundle.yaml index 8e2aca9..93c676b 100644 --- a/bundle.yaml +++ b/bundle.yaml @@ -4,30 +4,30 @@ # Generated: 2026-05-22 (rebuild revision 2026-06-01, bundle-cleanup change-set) # Replaces: bundle-pre-destroy.yaml (Bobcat 2023.2) # Charm channels: verified against Charmhub 2026-05-22 (see Caracal_Rebuild handoff D-002) -# Bindings: public:provider-vip (D-057), else:metal-* for API charms; all-metal for backend charms. +# Bindings: public:provider, else:metal for API charms; all-metal for backend charms. # Ceph data nets via public/cluster BINDINGS on ceph-mon/ceph-osd (these provision the # container/host NICs; ceph-*-network config would NOT). Ceph CLIENTS bind ceph->storage, # and each subordinate's storage/data binding is mirrored on its PRINCIPAL (subset rule). (C2) # Endpoints: IP-ONLY -- os-public-hostname dropped on all API charms; the dual VIPs ARE the -# catalog endpoints (public 10.12.8.N / internal+admin 10.12.12.N; D-057). Vault issues +# catalog endpoints (public 10.12.4.N / internal+admin 10.12.8.N). Vault issues # per-VIP IP-SAN certs. No control-plane DNS dependency. (B5) # HA chain: hacluster subordinates + dual VIPs + :ha relations ACTIVE for 11 API charms # (10 prior + ceph-radosgw, un-deferred). VIPs front-loaded into the MAAS-reserved -# /26: provider 10.12.4.2-.63, metal 10.12.12.2-.63 (supersedes .224-.254). (B1) +# /26: provider 10.12.4.2-.63, metal 10.12.8.2-.63 (supersedes .224-.254). (B1) # Vault: single unit, MYSQL storage backend (via vault-mysql-router). etcd + easyrsa # REMOVED -- the etcd backend was never used (live storage = mysql) and is moot at # 1 unit; HA backend (Raft vs etcd) is a Roosevelt rehearsal item. (C1; revises D-006) # Ceph networks: FULL separation via network-space BINDINGS -- ceph-mon/ceph-osd public->storage -# (10.12.20.0/22), ceph-osd cluster->replication (10.12.20.0/22). Bindings, NOT +# (10.12.16.0/22), ceph-osd cluster->replication (10.12.20.0/22). Bindings, NOT # ceph-*-network config, so the LXD-contained mon actually gets a storage NIC. # Clients bind ceph->storage; container principals carry it too (subset rule). (C2) # Magnum: Layer A only -- CAPI driver graft is Layer B (runbooks/phase-06..08) # Octavia: lb-mgmt PKI options supplied via overlays/octavia-pki.yaml (gitignored). # Amphora-pipeline options baked (use-internal-endpoints etc.). (B4) -# OVN tunnels: geneve overlay on the DATA space (10.12.16.0/22) -- ovn-chassis + ovn-chassis-octavia +# OVN tunnels: geneve overlay on the DATA space (10.12.12.0/22) -- ovn-chassis + ovn-chassis-octavia # 'data' binding; their principals also carry data (nova-compute:neutron-plugin bare-metal, # octavia:ovsdb-cms provisions the container NIC) per the subset rule. Prereq: enp8s0 -# link-subnet to 10.12.16.4N (rebuild-prep, machines Ready). +# link-subnet to 10.12.12.4N (rebuild-prep, machines Ready). # Resources: omitted -- let charms use latest available resource revisions # ============================================================ @@ -72,8 +72,7 @@ # Network-space bindings (D-052): EXPLICIT per-application blocks, no anchors. # "" -> metal-admin (operator/MAAS/monitoring; admin API; default) # internal/shared-db/amqp/certificates/cluster/identity/ovsdb -> metal-internal -# public -> provider-vip (public API VIPs, tagged VID 104; D-057) -# floating IPs -> provider-public (FIP pool .5-.7 stays on the untagged provider plane) +# public -> provider-public (public API + floating IPs) # ceph public -> storage ; ceph cluster -> replication # geneve overlay -> data-tenant (nova-compute:neutron-plugin, ovn-chassis:data, # ovn-chassis-octavia:data, octavia:ovsdb-cms) @@ -151,7 +150,7 @@ num_units: 1 # 3 on Roosevelt (D-009) to: [lxd:8] options: - vip: "10.12.8.50 10.12.12.50 10.12.16.50" # B1 front-loaded VIP; IS the catalog endpoint (B5, no os-public-hostname) + vip: "10.12.4.50 10.12.8.50 10.12.12.50" # B1 front-loaded VIP; IS the catalog endpoint (B5, no os-public-hostname) use-policyd-override: true # as-built reconcile 2026-06-09 (origin untraced -- Review-later) bindings: '': metal-admin @@ -166,7 +165,7 @@ internal: metal-internal keystone-fid-service-provider: metal-internal keystone-middleware: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal websso-trusted-dashboard: metal-internal constraints: arch=amd64 @@ -190,7 +189,7 @@ num_units: 1 to: [lxd:11] options: - vip: "10.12.8.53 10.12.12.53 10.12.16.53" # B1 + vip: "10.12.4.53 10.12.8.53 10.12.12.53" # B1 image-conversion: true # as-built; image conversion enabled (raw on Ceph-backed glance) bindings: '': metal-admin @@ -204,7 +203,7 @@ image-service: metal-internal internal: metal-internal object-store: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal storage-backend: metal-internal constraints: arch=amd64 @@ -246,7 +245,7 @@ options: console-access-protocol: novnc network-manager: Neutron - vip: "10.12.8.56 10.12.12.56 10.12.16.56" # B1 + vip: "10.12.4.56 10.12.8.56 10.12.12.56" # B1 bindings: '': metal-admin amqp: metal-internal @@ -265,7 +264,7 @@ neutron-api: metal-internal nova-cell-api: metal-internal placement: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal shared-db-cell: metal-internal constraints: arch=amd64 @@ -315,7 +314,7 @@ num_units: 1 to: [lxd:11] options: - vip: "10.12.8.59 10.12.12.59 10.12.16.59" # B1 + vip: "10.12.4.59 10.12.8.59 10.12.12.59" # B1 bindings: '': metal-admin amqp: metal-internal @@ -325,7 +324,7 @@ identity-service: metal-internal internal: metal-internal placement: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal constraints: arch=amd64 @@ -351,7 +350,7 @@ enable-ml2-port-security: true flat-network-providers: physnet1 neutron-security-groups: true - vip: "10.12.8.55 10.12.12.55 10.12.16.55" # B1 + vip: "10.12.4.55 10.12.8.55 10.12.12.55" # B1 bindings: '': metal-admin amqp: metal-internal @@ -363,7 +362,7 @@ neutron-api: metal-internal neutron-plugin-api: metal-internal neutron-plugin-api-subordinate: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal constraints: arch=amd64 @@ -410,6 +409,7 @@ ovn-bridge-mappings: physnet1:br-ex prefer-chassis-as-gw: true # B2 -- elects gateway chassis so tenant routers get external egress bridge-interface-mappings: >- + br-ex:52:54:00:3d:fd:54 br-ex:52:54:00:9d:63:77 br-ex:52:54:00:89:7f:ce br-ex:52:54:00:99:fc:c2 @@ -438,7 +438,7 @@ options: block-device: None glance-api-version: 2 - vip: "10.12.8.52 10.12.12.52 10.12.16.52" # B1 + vip: "10.12.4.52 10.12.8.52 10.12.12.52" # B1 bindings: '': metal-admin amqp: metal-internal @@ -452,7 +452,7 @@ identity-service: metal-internal image-service: metal-internal internal: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal storage-backend: metal-internal constraints: arch=amd64 # owns the relation -- but the binding still provisions the NIC. @@ -520,7 +520,7 @@ to: [lxd:8] options: source: *ceph-source - vip: "10.12.8.60 10.12.12.60 10.12.16.60" # B1 -- radosgw HA un-deferred for Roosevelt fidelity (decorative HA on testcloud) + vip: "10.12.4.60 10.12.8.60 10.12.12.60" # B1 -- radosgw HA un-deferred for Roosevelt fidelity (decorative HA on testcloud) bindings: '': metal-admin certificates: metal-internal @@ -531,7 +531,7 @@ internal: metal-internal mon: storage object-store: metal-internal - public: provider-vip + public: provider-public radosgw-user: metal-internal s3: metal-internal constraints: arch=amd64 @@ -547,7 +547,7 @@ to: [lxd:10] options: debug: "false" - vip: "10.12.8.58 10.12.12.58 10.12.16.58" # B1 -- browse HTTPS by IP (B5); ALLOWED_HOSTS must permit the VIP IP (verify at deploy) + vip: "10.12.4.58 10.12.8.58 10.12.12.58" # B1 -- browse HTTPS by IP (B5); ALLOWED_HOSTS must permit the VIP IP (verify at deploy) bindings: '': metal-admin application-dashboard: metal-internal @@ -557,7 +557,7 @@ dashboard-plugin: metal-internal ha: metal-internal identity-service: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal website: metal-internal websso-fid-service-provider: metal-internal @@ -595,7 +595,7 @@ # juju deploy ./bundle.yaml \ # --overlay overlays/vr0-dc0-testcloud.yaml \ # --overlay overlays/octavia-pki.yaml - vip: "10.12.8.57 10.12.12.57 10.12.16.57" # B1 + vip: "10.12.4.57 10.12.8.57 10.12.12.57" # B1 bindings: '': metal-admin amqp: metal-internal @@ -608,7 +608,7 @@ neutron-openvswitch: metal-internal ovsdb-cms: data-tenant ovsdb-subordinate: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal constraints: arch=amd64 # subset for the subordinate's data binding (subset rule). @@ -654,7 +654,7 @@ to: [lxd:11] options: openstack-origin: *openstack-origin - vip: "10.12.8.51 10.12.12.51 10.12.16.51" # B1 + vip: "10.12.4.51 10.12.8.51 10.12.12.51" # B1 bindings: '': metal-admin amqp: metal-internal @@ -663,7 +663,7 @@ ha: metal-internal identity-service: metal-internal internal: metal-internal - public: provider-vip + public: provider-public secrets: metal-internal shared-db: metal-internal constraints: arch=amd64 @@ -707,7 +707,7 @@ options: openstack-origin: *openstack-origin region: RegionOne - vip: "10.12.8.54 10.12.12.54 10.12.16.54" # B1 + vip: "10.12.4.54 10.12.8.54 10.12.12.54" # B1 bindings: '': metal-admin amqp: metal-internal @@ -716,7 +716,7 @@ ha: metal-internal identity-service: metal-internal internal: metal-internal - public: provider-vip + public: provider-public shared-db: metal-internal constraints: arch=amd64 diff --git a/docs/design-decisions.md b/docs/design-decisions.md index a6671af..88f8308 100644 --- a/docs/design-decisions.md +++ b/docs/design-decisions.md @@ -819,7 +819,7 @@ ## D-057: provider-vip plane -- separate tagged, routed plane for public API VIPs (2026-06-29) -**Status:** DECIDED. Full record + D-003B amendment: `docs/D-057-DECIDED-append.md`. +**Status:** SUPERSEDED by D-060 (Pattern A revert, 2026-06-29) -- the provider-vip plane is abandoned; public API VIPs return to provider-public. [Originally DECIDED.] Full record + D-003B amendment: `docs/D-057-DECIDED-append.md`. Root cause of the phase-06 FIP-unreachability blocker: API LXD containers bind `public` to provider-public (untagged enp1s0); Juju bridges enp1s0 into a Linux bridge, starving @@ -833,9 +833,11 @@ ## D-058: full plane renumber -- clean fabric-grouped /22 scheme (2026-06-29) -**Status:** DECIDED (operator). Full map, jumphost ordering trap, NetBox-apex note, and -the committed-foundation cascade: `docs/D-058-renumber.md`. Supersedes the D-057 minimal- -delta placement of provider-vip at 10.12.24.0/22; resolves R4 (oob). +**Status:** SUPERSEDED by D-060 (Pattern A revert, 2026-06-29) -- the plane renumber is +abandoned; the cloud stays on the D-052/D-053 scheme. [Originally DECIDED (operator).] Full +map, jumphost ordering trap, NetBox-apex note, and the committed-foundation cascade: +`docs/D-058-renumber.md`. Supersedes the D-057 minimal-delta placement of provider-vip at +10.12.24.0/22; resolves R4 (oob). Cloud-wide re-IP for Roosevelt addressing fidelity (contiguous /22 blocks grouped by fabric), executed as a teardown/redeploy (no in-place re-CIDR, no transient overlap). @@ -900,4 +902,75 @@ parked pending the sheets, with NIC bonding as an alternative if the hardware offers it. **Status:** Adopted 2026-06-29. Pending input: Roosevelt inbound-server NIC count. -**Related:** D-057 (provider-vip plane), D-058 (plane renumber), D-052 (space inversion). \ No newline at end of file +**Related:** D-057 (provider-vip plane), D-058 (plane renumber), D-052 (space inversion). +## D-060: revert to Pattern A on the hyperconverged test cloud -- supersedes D-057 + D-058 (2026-06-29) + +**Decision:** Revert the provider-NIC architecture to **Pattern A** (MAAS builds the OVS +bridge `br-ex` on provider NIC `enp1s0`; the host static lands on `br-ex`; the API LXD +containers' `public` endpoint AND ovn-chassis share that one OVS `br-ex`) and KEEP the +existing D-052/D-053 plane scheme. **D-057 (provider-vip plane) and D-058 (full plane +renumber) are SUPERSEDED** -- both were responses to a self-inflicted regression, not to a +genuine architectural requirement. + +**Root cause (the pivot).** The phase-06 floating-IP-unreachability blocker was introduced by +this Claude lineage, not by Pattern A. The working pre-teardown cloud ran Pattern A. A prior +instance advised switching to **Pattern B** (raw `enp1s0`, charm-built `br-ex`) on the +rebuild. On a hyperconverged node -- where the API LXD containers and ovn-chassis share one +provider NIC -- Pattern B makes Juju build a competing Linux bridge `br-enp1s0` that captures +`enp1s0` before ovn-chassis can enslave it into OVS `br-ex`, starving `br-ex` of carrier and +darkening the floating-IP plane (OVN's gateway ARP responder goes dead). D-057 (move the +public API VIPs to a tagged `provider-vip` plane, VID 104, to free untagged `enp1s0` for +`br-ex`) and D-058 (cloud-wide renumber) were built to work around that breakage. Reverting +Pattern B removes the cause, so the workarounds are no longer needed. + +**No re-CIDR.** The live MAAS is already on D-052/D-053 (verified 2026-06-29: juju / lxd / +capi-mgmt on metal-admin 10.12.8.x; openstack0 data-tenant 10.12.16.40; openstack2 storage +10.12.32.42). D-058's renumber was never applied to the running cloud. So the revert is a +teardown + Pattern A re-carve + redeploy on the EXISTING subnet scheme -- no subnet migration, +no DHCP change, no `provider-vip` plane. + +**Node-role learning (Roosevelt is NOT affected).** The collision is specific to +hyperconverged topology. Roosevelt is dedicated-role: Gateway Nodes run `br-ex` / Neutron and +Controller Nodes run the API on SEPARATE machines, so no single NIC is shared between the API +containers and ovn-chassis -- the Linux-bridge-vs-OVS race cannot occur there. On Roosevelt, +Pattern B (charm-built `br-ex` on a dedicated gateway uplink) is the clean fit; the test cloud +rehearses the STACK, not the node-role split. The metal-internal VID-103 trunk, which on the +test cloud rides one shared metal NIC, becomes a real trunk to the gateway / controller / +compute access ports at Roosevelt. + +**Files (this revert):** +- `bundle.yaml` -- reverted to the `bf7de5a` provider config: `public: provider-public` x11, + VIP triple `10.12.4.5x / 10.12.8.5x / 10.12.12.5x`, all four `br-ex` MACs in ovn-chassis + `bridge-interface-mappings` (the openstack0 MAC `52:54:00:3d:fd:54` that D-057 trimmed is + restored). The bundle diff `bf7de5a..HEAD` was verified to be PURELY D-057/D-058, so the + revert is a clean wholesale checkout of that file. +- `scripts/lib-net.sh` -- CIDR / VIP constants back to D-052/D-053; `PROVIDER_VIP_CIDR` / + `PROVIDER_VIP_VID` dropped; `provider-vip` added to `STALE_SPACES` (must-be-absent guard). +- `scripts/carve-host-interfaces.sh` -- provider section is now Pattern A (`build_ovs_brex`: + unlink commissioning link -> create OVS `br-ex` on enp1s0 -> host static on `br-ex`); the + `enp1s0.104` / `br-prov-api` provider-vip stack and the VID-104 assertion are removed; + metal / data statics back to D-052/D-053 (br-metal .8, br-internal .12, data .16). +- `scripts/provider-bundle-check.py` -- NEW fail-closed deploy gate (provider-public x11, + `.4/.8/.12` triple octet 50-60, all 4 chassis MACs present); REPLACES the retired + `scripts/d057-bundle-check.py`. Verified PASS against the reverted bundle. +- `scripts/phase-00-maas-standup.sh` -- target table + narrative retargeted to D-052/D-053; + a dry-run now reports "no drift" against the live cloud (refuse-to-clobber guard retained + for a fresh cloud / Roosevelt). +- `scripts/phase-00-teardown.sh` -- header + next-pointer updated (Pattern A + re-carve / standup-verify, not re-CIDR). Logic unchanged (dynamic system_id resolution, + capi-mgmt delete, management-substrate exclusion). +- `runbooks/phase-00-teardown-maas-reset.md` -- rewritten script-driven and current + (deletes capi-mgmt, dynamic system_ids, D-052/D-053 CIDRs, Pattern A re-carve, OSD wipe, + 8_lbaas net removal). The prior file was pre-D-052 stale (five-space scheme, hardcoded + system_ids, "leave capi-mgmt Ready"). + +**Retired (git rm):** `scripts/phase-00-maas-recidr.sh` (D-058 re-CIDR; the live cloud is +already D-052/D-053), `scripts/provider-vip-standup.sh` (D-057), `scripts/d057-bundle-check.py` +(replaced), `runbooks/jumphost-provider-vip-gateway.md` (D-057), +`runbooks/provider-vip-maas-standup.md` (D-057), `runbooks/phase-00-maas-reconfigure.md` +(D-058 reconfigure spine). + +**Status:** Adopted 2026-06-29 (operator "Go"). Supersedes D-057, D-058. +**Related:** D-052 / D-053 (plane scheme, retained), D-003B (API + FIP L2 co-location, +restored), D-059 (NIC budget retained; its inventory's "enp1s0 = provider-public + provider-vip" +row is now just provider-public -- the physical five-NIC count is unchanged). diff --git a/runbooks/jumphost-provider-vip-gateway.md b/runbooks/jumphost-provider-vip-gateway.md deleted file mode 100644 index d6e81b7..0000000 --- a/runbooks/jumphost-provider-vip-gateway.md +++ /dev/null @@ -1,149 +0,0 @@ -# jumphost provider-vip L3 gateway (virbr1.104 = 10.12.8.1) -- D-057 - -Provisions the L3 gateway that makes provider <-> provider-vip routing real on the -jumphost (vopenstack-jesse, 10.17.11.246). provider-vip (10.12.8.0/22, VID 104) -rides the SAME libvirt bridge as provider (virbr1), tagged; the jumphost already -routes between its directly-connected planes (ip_forward=1), so once virbr1.104 = -10.12.8.1 exists, tenant SNAT (on provider 10.12.5-7) reaches the API VIPs on -10.12.8.50-60 and back. - -WHY A RUNBOOK, NOT A SCRIPT: this is a one-time, consequential host change. The real -risk is how persistence interacts with libvirt (virbr1 is libvirt-managed, created at -libvirtd start) -- which a fixture test cannot exercise. It is also NOT portable to -Roosevelt (no virbr1 there; the provider-vip gateway is a physical router/SVI). So it -is gated and human-run, per the project's "human gates own consequential mutations". - -NOT required for: the MAAS plane stand-up, or the carve. The MAAS subnet records -gateway_ip=10.12.8.1 as metadata regardless. REQUIRED before: D-011 #3 (tenant -> -API reachability) and any provider<->provider-vip traffic test. - -================================================================================ -## PHASE 1 -- AUDIT (read-only). Run, paste back; this picks the persistence method. -================================================================================ - ---- BEGIN runbook block: gw-01-audit (RUN ON jumphost) --- -echo "=== G1: virbr1 must pass tagged frames (VID 104). MUST be 0 ===" -cat /sys/class/net/virbr1/bridge/vlan_filtering 2>/dev/null \ - || echo "WARN: virbr1 has no bridge/vlan_filtering node -- investigate before proceeding" - -echo "=== G2: ip_forward must be 1 ===" -cat /proc/sys/net/ipv4/ip_forward - -echo "=== G3: virbr1 detail (is it a bridge? up? who owns it?) ===" -ip -d link show virbr1 | sed -n '1,6p' -ip -br addr show virbr1 - -echo "=== G4: libvirt 1_provider net -- autostart + forward mode (NAT double-NAT note) ===" -sudo virsh net-info 1_provider 2>/dev/null -sudo virsh net-dumpxml 1_provider 2>/dev/null | sed -n '1,30p' - -echo "=== G5: is virbr1 touched by netplan? (decides systemd-vs-netplan persistence) ===" -ls -1 /etc/netplan/ 2>/dev/null -sudo grep -RnE 'virbr1|10\.12\.4\.1|10\.12\.8\.' /etc/netplan/ 2>/dev/null || echo "netplan: no virbr1 / .8 references" - -echo "=== G6: must NOT already exist ===" -ip -br addr show | grep -E 'virbr1\.104|10\.12\.8\.' || echo "clean: no virbr1.104 / .8 yet" ---- END runbook block: gw-01-audit --- - -STOP. Decision from the audit: -- G1 != 0 -> STOP. VID 104 will not traverse virbr1; the tagged-secondary approach - needs rework. This is the same hard gate as the MAAS stand-up. -- G5 shows virbr1 already managed in netplan -> prefer the NETPLAN persistence - variant (Phase 3B) to avoid two managers fighting. -- G5 shows virbr1 is purely libvirt (the expected case) -> use the SYSTEMD ONESHOT - variant (Phase 3A): it orders cleanly after libvirtd and won't race a netplan that - doesn't manage virbr1. -- G4 autostart != yes -> enable it (`sudo virsh net-autostart 1_provider`) so virbr1 - exists at boot before the gateway unit runs. - -================================================================================ -## PHASE 2 -- RUNTIME (reversible; proves it works before persisting) -================================================================================ - -GATE. Brings the gateway up immediately (lost on reboot -- Phase 3 persists it). -Fully reversible via the rollback block. ---- BEGIN runbook block: gw-02-runtime --- -sudo ip link add link virbr1 name virbr1.104 type vlan id 104 -sudo ip addr add 10.12.8.1/22 dev virbr1.104 -sudo ip link set virbr1.104 up -ip -br addr show virbr1.104 -ip route show 10.12.8.0/22 ---- END runbook block: gw-02-runtime --- - -ROLLBACK (if anything looks wrong): - sudo ip link del virbr1.104 - -TEST (after the MAAS plane exists and a host carries a .8 static, e.g. post-carve): - ping -c2 10.12.8.1 # the gateway itself - ping -c2 10.12.8.40 # a host's br-prov-api static (if carved) - # from a provider-plane host, confirm .8 is reachable via the jumphost route - -NOTE (libvirt NAT, cosmetic): 1_provider is forward mode=nat, so .4<->.8 traffic may -be masqueraded to the jumphost's address. It still works statefully (the API does not -care about source IP). If you later want symmetric, un-NATed provider<->provider-vip -routing, add an iptables RETURN rule ahead of the libvirt masquerade for -10.12.4.0/22 <-> 10.12.8.0/22 -- optional, not needed for v1. - -================================================================================ -## PHASE 3 -- PERSISTENCE (pick ONE per the Phase-1 decision) -================================================================================ - -### 3A -- systemd oneshot (RECOMMENDED for libvirt-managed virbr1) -Orders after libvirtd; idempotent (deletes any stale virbr1.104 first). ---- BEGIN runbook block: gw-03a-systemd --- -sudo tee /etc/systemd/system/provider-vip-gw.service >/dev/null <<'UNIT' -[Unit] -Description=provider-vip L3 gateway (virbr1.104 = 10.12.8.1) -- D-057 -After=libvirtd.service network-online.target -Wants=network-online.target -Requires=libvirtd.service -[Service] -Type=oneshot -RemainAfterExit=yes -ExecStartPre=-/sbin/ip link del virbr1.104 -ExecStart=/sbin/ip link add link virbr1 name virbr1.104 type vlan id 104 -ExecStart=/sbin/ip addr add 10.12.8.1/22 dev virbr1.104 -ExecStart=/sbin/ip link set virbr1.104 up -ExecStop=/sbin/ip link del virbr1.104 -[Install] -WantedBy=multi-user.target -UNIT -sudo systemctl daemon-reload -sudo systemctl enable --now provider-vip-gw.service -systemctl --no-pager status provider-vip-gw.service | sed -n '1,6p' -ip -br addr show virbr1.104 ---- END runbook block: gw-03a-systemd --- - -Persistence test (the real proof): `sudo reboot`, then after it returns -`ip -br addr show virbr1.104` must show 10.12.8.1/22 UP. (libvirt 1_provider must be -autostart -- see G4 -- so virbr1 exists when the unit runs.) - -ROLLBACK 3A: - sudo systemctl disable --now provider-vip-gw.service - sudo rm -f /etc/systemd/system/provider-vip-gw.service && sudo systemctl daemon-reload - sudo ip link del virbr1.104 2>/dev/null || true - -### 3B -- netplan (ONLY if G5 showed virbr1 already managed by netplan) -Add a vlans stanza. Risk: if virbr1 is NOT up when netplan runs at boot, the vlan -fails -- which is exactly why 3A is preferred for a libvirt bridge. Use only if your -jumphost already manages virbr1 via netplan. - # in the relevant /etc/netplan/*.yaml, under network:: - # vlans: - # virbr1.104: - # id: 104 - # link: virbr1 - # addresses: [10.12.8.1/22] - # then: sudo netplan try (auto-reverts in 120s if unreachable), then sudo netplan apply -ROLLBACK 3B: remove the stanza; sudo netplan apply. - -================================================================================ -## PHASE 4 -- VERIFY -================================================================================ ---- BEGIN runbook block: gw-04-verify --- -ip -br addr show virbr1.104 # 10.12.8.1/22, UP -ip route show 10.12.8.0/22 # directly-connected via virbr1.104 -cat /proc/sys/net/ipv4/ip_forward # 1 ---- END runbook block: gw-04-verify --- - -DONE when virbr1.104 = 10.12.8.1/22 is UP, survives a reboot (3A), and a provider-plane -host can reach 10.12.8.x through the jumphost. diff --git a/runbooks/phase-00-maas-reconfigure.md b/runbooks/phase-00-maas-reconfigure.md deleted file mode 100644 index 7d8e330..0000000 --- a/runbooks/phase-00-maas-reconfigure.md +++ /dev/null @@ -1,63 +0,0 @@ -# Phase 00 -- MAAS reconfigure to D-058 - -Sequences the gated steps that take the live D-052/053 cloud to the D-058 plane -scheme, then hands off to deploy. Scripts do the deterministic/idempotent work; -the destructive juju + libvirt steps stay human-gated (runbooks), by design. - -Precondition check (read-only): `scripts/phase-00-maas-standup.sh` should report -the three DRIFT planes (.8 metal-admin -> provider-vip, .12 metal-internal -> -metal-admin, .16 data-tenant -> metal-internal). If it reports no drift, the cloud -is already on D-058 and only the deploy remains. - -## Step 1 -- Teardown (gated runbook: runbooks/phase-00-teardown-maas-reset.md) -Destroy the `openstack` Juju model and release openstack0-3 to MAAS Ready. The -hosts MUST be released so the migrating subnets carry no live interface links -- -the re-CIDR deletes those subnets, and MAAS refuses to delete a subnet with live -allocations. `juju destroy-model` is typed by the operator (not auto-scripted). -GATE: `juju models` shows no `openstack`; openstack0-3 are Ready. - -## Step 2 -- Audit (read-only) -``` -scripts/phase-00-maas-standup.sh # expect 3 DRIFT lines (.8/.12/.16) -scripts/phase-00-maas-recidr.sh # audit: migration plan + metal/data fabric ids -``` -Eyeball the fabric ids and confirm no live IP allocations are flagged on the -migrating subnets. Change nothing here. - -## Step 3 -- Re-CIDR (gated, destructive) -``` -scripts/phase-00-maas-recidr.sh --apply -``` -Deletes the old .8/.12/.16 subnets (reserved ranges first), then recreates -.12/.16/.20 on the SAME fabrics/VLANs (reuse-in-place; spaces inherited via the -persisted VLANs). Collision-safe: all deletes precede all creates. If a delete is -refused (live links remain), clear them (release/delete the machine interfaces) -and re-run -- the script is idempotent (already-migrated planes SKIP). - -## Step 4 -- Standup (gated) -``` -scripts/phase-00-maas-standup.sh --apply # provider-vip .8 (VID 104) + gateways + dns + ALL reserves -scripts/phase-00-maas-standup.sh # verify: all-SKIP, no drift -``` -The standup is the single MAAS-address authority (topology + VIP bands + FIP pool -+ mgmt reserves). `phase-00-maas-carve.sh` is retired. - -## Step 5 -- Jumphost bridges (gated host runbook: runbooks/jumphost-provider-vip-gateway.md) -ORDERING TRAP (D-058): provider-vip's gateway 10.12.8.1 IS metal-admin's OLD -address. On the jumphost, in order: - (a) virbr2 (metal-admin) 10.12.8.1 -> 10.12.12.1 - (b) virbr7 (oob) confirm already 10.12.60.1 (live) - (c) THEN virbr1.104 (provider-vip) = 10.12.8.1 -Bringing up virbr1.104=.8.1 before (a) frees .8.1 is a same-subnet collision. -libvirt/netplan persistence is host-specific -- typed by the operator, not scripted. - -## Step 6 -- Deploy handoff -Proceed to phase-01 bundle deploy. Per-host interface carve -(`scripts/carve-host-interfaces.sh`) runs after commissioning. The bundle already -carries the D-058 VIP triples; `d057-bundle-check.py` PASSes against it. - -## Why teardown + jumphost are runbooks, not scripts -`juju destroy-model` and libvirt bridge edits are the most consequential and least -reversible / least portable actions in the phase. Per the operating discipline, -consequential mutations are human-gated; these stay operator-typed. The -deterministic, idempotent MAAS work (re-CIDR, standup) is scripted + behavior-tested. diff --git a/runbooks/phase-00-teardown-maas-reset.md b/runbooks/phase-00-teardown-maas-reset.md index b724724..3ef3e6a 100644 --- a/runbooks/phase-00-teardown-maas-reset.md +++ b/runbooks/phase-00-teardown-maas-reset.md @@ -1,150 +1,120 @@ -# Phase 00 -- Teardown + MAAS Reset +# Phase 00 -- Teardown + Pattern A Reset (D-060) -Destroy the `openstack` Juju model and reset the four MAAS hosts to a clean, -deploy-ready state: OSD secondary disks wiped, storage-class NICs linked, and the -MAAS VIP/FIP address carve in place. This is the rebuild-prep window -- it runs -BEFORE phase-01, because the VIP block must be MAAS-reserved before the bundle -deploys onto it, and `link-subnet` only works on a Ready (not Deployed) machine. +Destroy the `openstack` Juju model, delete the orphaned `capi-mgmt` MAAS machine, and reset +the four hosts (openstack0-3) to a clean, **Pattern A**, deploy-ready state on the EXISTING +**D-052/D-053** plane scheme. This is the rebuild-prep window -- it runs BEFORE phase-01. -Decisions: D-018 (skip graceful; MAAS-release-direct; supersedes D-013), D-017 -(full rebuild every cycle, nothing preserved), KI-P3-001 (the VIP carve fix). -Troubleshooting: appendix-A -- DOCFIX-016 (never `maas list` -- API-key leak), -DOCFIX-017 (no `maas whoami`; hardcode the eyeballed system_ids), R7 (sudo for -libvirt/qemu-img), KI-P3-001. +The deterministic, repeatable work is owned by SCRIPTS (each resolves every id live and is +dry-run by default); the destructive juju + libvirt steps stay human-gated here. Run from +jumphost `vopenstack-jesse` (user `jessea123`, sudo; also the libvirt hypervisor); MAAS CLI +logged in as profile `admin`. -!!! DESTRUCTIVE. Phase 1 (destroy-model + release) and Phase 2 (OSD wipe) are - irreversible. There is NO model-state rollback (DEVIATION-1): a KVM snapshot revert - cannot restore the destroyed Juju model -- the repo runbooks ARE the tested restore - path (D-017). Each destructive step is DISCRETE and individually gated -- do not batch. +Decisions: **D-060** (Pattern A revert; supersedes D-057/D-058), D-018 (MAAS-release-direct +teardown), D-017 (full rebuild every cycle, nothing preserved). The live MAAS is already on +D-052/D-053, so there is **NO re-CIDR** -- the carve re-establishes Pattern A interfaces on +the existing subnets. -CAPI-MGMT NOTE: this teardown releases the FOUR openstack hosts only. The MAAS -`capi-mgmt` VM is the RETIRED D-033 out-of-cloud node; the in-cloud `capi-mgmt-v2` -tenant VM (phase-06) replaces it. Leave `capi-mgmt` Ready (its separate Phase-7 -teardown is out of scope here). (The older 01-destroy-model.md released 5 VMs incl. -capi-mgmt -- that was the D-033 era; do NOT release it on the current rebuild.) +!!! DESTRUCTIVE. The model destroy + host release (Step 2) and the OSD wipe (Step 4) are + irreversible. There is NO model-state rollback (D-017): the repo runbooks ARE the tested + restore path. Each destructive step is DISCRETE and individually gated -- do not batch. + +CAPI-MGMT: the orphaned `capi-mgmt` MAAS machine (retired D-033 out-of-cloud node) is +**DELETED** by `scripts/phase-00-teardown.sh` -- it is no longer left Ready. The in-cloud +`capi-mgmt-v2` tenant VM (phase-06) dies with the model. --- -## Prerequisites -- (OPTIONAL) KVM snapshots of openstack0-3. NOTE (DEVIATION-1): snapshots do NOT give - model-state rollback -- destroy-model erases the Juju controller DB, so a disk revert - resurrects machines with no managing model + a stale MAAS view. The repo runbooks are - the restore path (D-017); snapshots are not required for this cycle. -- Authenticated juju session (`juju whoami`). MAAS CLI logged in as profile `admin`. -- Run from jumphost `vopenstack-jesse` (user `jessea123`, sudo; also the libvirt hypervisor). - -## Constants and env-literals -- MAAS profile: `admin` (DOCFIX-016: NEVER `maas list` -- it prints the API key). -- system_ids (hardcode; DOCFIX-017, no `maas whoami`): openstack0=`4na83t`, - openstack1=`qdbqd6`, openstack2=`h8frng`, openstack3=`tmsafc`. -- MAAS subnet ids: 1=provider 10.12.4.0/22, 2=metal 10.12.8.0/22, 6=data 10.12.12.0/22, - 7=storage 10.12.16.0/22, 8=replication 10.12.20.0/22. -- per-host storage NIC octet = 40 + index: data 10.12.12.4N, storage 10.12.16.4N, replication 10.12.20.4N. - -## Run-location legend -- `# RUN: jumphost` -- `juju` + `maas admin`; the jumphost is also the libvirt hypervisor (sudo). - ---- - -## Command-label convention -Every command block below is bracketed by bold labels, so a command line is never mistaken -for surrounding prose (these render in GitBucket and read clearly in a raw editor): -- **RUN -- LOC** -- the block CHANGES state; run it at LOC (e.g. `jumphost`, `vault/0`, `jumphost -> magnum/0`). -- **CHECK (read-only) -- LOC** -- a read-only verification; safe to re-run. -- **GATE:** -- a hard stop; do NOT proceed past the block unless the stated condition holds. -- **Expect:** -- what a passing result looks like. -- `> CAUTION:` -- marks a destructive, secret-handling, or irreversible step. - -## Phase 0 -- Pre-flight (READ-ONLY; run before teardown) - -**RUN -- jumphost** -```bash -( { - echo "=== 0a. five network spaces (hard blocker if absent) ===" - # DOCFIX-026: MAAS is authoritative for spaces (Juju imports them at add-model); use the - # model-independent query (same as Phase 5). Expect: metal 10.12.8.0/22 | provider 10.12.4.0/22 - # | data 10.12.12.0/22 | storage 10.12.16.0/22 | replication 10.12.20.0/22 (lbaas + undefined also appear). - maas admin spaces read | jq -r '.[] | "\(.name)\t\([.subnets[]?.cidr] | join(", "))"' - - echo "=== 0b. VIP ipranges (note the front-loaded ones to KEEP + the stale .224-.254 to remove) ===" - maas admin ipranges read \ - | jq -r '.[] | "id=\(.id)\ttype=\(.type)\t\(.start_ip)-\(.end_ip)\tsubnet=\(.subnet.cidr // "?")\t\(.comment // "")"' | sort - # KEEP: provider 10.12.4.2-.63, metal 10.12.8.2-.63 (bundle VIPs live here), provider FIP 10.12.5.0-10.12.7.254. - # STALE: metal 10.12.8.224-.254 (old scheme) -> its id feeds Phase 4 (this arc: id=2). - - echo "=== 0c. storage-class NIC link state on all four hosts (drives Phase 3) ===" - for SID in 4na83t qdbqd6 h8frng tmsafc; do echo " -- $SID --" - maas admin interfaces read "$SID" | jq -r '.[] | select(.name|test("^enp(8|9|10)s0$")) - | " \(.name)\tid=\(.id)\tlinks=\([.links[]?|{(.subnet.cidr):.ip_address}])"' - done # enp8s0(data) is the one KNOWN unlinked + a HARD deploy prereq; enp9s0/enp10s0 usually already linked -} ) +## Sequence (this phase) ``` - -**RUN -- jumphost** -```bash -# 0d. OSD-wipe pre-flight gate -- post-teardown these are "shut off"; vdb is root:root / 600. (R7: sudo) -for host in openstack0 openstack1 openstack2 openstack3; do - f="/var/lib/libvirt/images/${host}-1.qcow2" - printf '%-46s state=%s owner=%s mode=%s\n' "$f" \ - "$(sudo virsh -c qemu:///system domstate "$host" 2>/dev/null)" \ - "$(sudo stat -c '%U:%G' "$f" 2>/dev/null)" "$(sudo stat -c '%a' "$f" 2>/dev/null)" -done # expect (AFTER Phase 1 release): 4 lines, state=shut off, owner=root:root, mode=600. - # (Run PRE-teardown as a baseline: state=running, owner=libvirt-qemu:kvm -- correct live state.) +1. Pre-flight (read-only; baseline) +2. Teardown scripts/phase-00-teardown.sh --apply [DESTRUCTIVE: model + capi-mgmt] + -- hosts release to MAAS Ready, powered off -- +3. 8_lbaas net removal one-off jumphost op [hosts off] +4. OSD secondary wipe vdb -> blank 512G [DESTRUCTIVE; hosts off] +5. Pattern A re-carve scripts/carve-host-interfaces.sh --apply [hosts Ready] +6. Standup + bundle gate scripts/phase-00-maas-standup.sh ; provider-bundle-check.py [read-only] + -> EXIT GATE -> phase-01 deploy ``` +Steps 3-6 all run with the hosts Ready and powered off (the carve edits MAAS interface +metadata; 8_lbaas + OSD operate on libvirt). The phase-01 deploy powers the hosts on and +applies the carved netplan. -## Phase 1 -- Teardown (D-018) DISCRETE / DESTRUCTIVE - -**RUN -- jumphost** -```bash -# A. pre-destroy capture (reference only; NOT for restore) -TS=$(date -u +%Y%m%dT%H%M%SZ); BACKUP_DIR=$HOME/backups/pre-caracal-destroy-$TS; mkdir -p "$BACKUP_DIR" -juju export-bundle > "$BACKUP_DIR/bundle-pre-destroy.yaml" -juju status --format=yaml > "$BACKUP_DIR/juju-status-pre-destroy.yaml" -for f in "$BACKUP_DIR"/*.yaml; do [ -s "$f" ] || echo "WARNING: $f empty"; done -echo "$BACKUP_DIR" > "$HOME/.last-pre-caracal-destroy-backup"; ls -la "$BACKUP_DIR" -``` - -> CAUTION: destroys the entire `openstack` Juju model -- irreversible. The controller is -> untouched, but every app/unit/relation is reaped. Confirm you are on the testcloud, not Roosevelt. - -**RUN -- jumphost** -```bash -# B. destroy the openstack model (returns ~1-2 min; reaping ~5-10 min background). Controller untouched. -juju destroy-model openstack --force --no-wait --destroy-storage --no-prompt -``` - -> CAUTION: releases the four openstack hosts back to MAAS (erase + power off). Hardcoded -> system_ids (DOCFIX-017) -- does NOT touch the capi-mgmt host. - -**RUN -- jumphost** -```bash -# C. release the FOUR openstack hosts by system_id (DOCFIX-017: hardcoded ids, no whoami). NOT capi-mgmt. -for SID in 4na83t qdbqd6 h8frng tmsafc; do - echo "Releasing $SID..."; maas admin machine release "$SID" comment="Caracal rebuild teardown $TS" -done -``` +## Step 1 -- Pre-flight (READ-ONLY) **CHECK (read-only) -- jumphost** ```bash -# D. verify -juju models # expect: no 'openstack' (allow a few min) -maas admin machines read \ - | jq -r '.[] | select(.hostname|test("^openstack[0-3]$")) | "\(.hostname)\t\(.status_name)"' | sort - # expect four lines, each ending "Ready" -``` -**GATE:** `juju models` shows no `openstack`; openstack0-3 all Ready. (`link-subnet` is -REJECTED on a Deployed machine -- Phases 2-3 REQUIRE Ready.) If the model is still -`destroying` after ~10 min: `juju machines -m openstack --format=yaml`, then -`juju remove-machine -m openstack --force ` for each lingering id, then re-run the -destroy-model in B. +( { + echo "=== six D-052/D-053 spaces (hard blocker if absent) ===" + # expect: provider-public 10.12.4.0/22 | metal-admin 10.12.8.0/22 | metal-internal 10.12.12.0/22 + # | data-tenant 10.12.16.0/22 | storage 10.12.32.0/22 | replication 10.12.36.0/22 + maas admin spaces read | jq -r '.[] | "\(.name)\t\([.subnets[]?.cidr] | join(", "))"' | sort -## Phase 2 -- OSD secondary-disk wipe (clean-slate Ceph) DISCRETE / DESTRUCTIVE -Only after Phase 0d is GREEN (all "shut -off") AND explicit go. vda (the OS disk) is NOT touched -- MAAS reinstalls it on -deploy; only vdb (the OSD target) is recreated blank. + echo "=== hosts + capi-mgmt status (baseline) ===" + maas admin machines read | jq -r '.[]|select(.hostname|test("^(openstack[0-3]|capi-mgmt)$"))|"\(.hostname)\t\(.status_name)"' | sort + + echo "=== OSD vdb baseline (pre-teardown: running, libvirt-qemu:kvm) ===" + for host in openstack0 openstack1 openstack2 openstack3; do + f="/var/lib/libvirt/images/${host}-1.qcow2" + printf ' %-46s state=%s owner=%s mode=%s\n' "$f" \ + "$(sudo virsh -c qemu:///system domstate "$host" 2>/dev/null)" \ + "$(sudo stat -c '%U:%G' "$f" 2>/dev/null)" "$(sudo stat -c '%a' "$f" 2>/dev/null)" + done +} ) +``` + +## Step 2 -- Teardown (D-018 / D-060) DISCRETE / DESTRUCTIVE + +`scripts/phase-00-teardown.sh` is the authority: it resolves the four host system_ids live +(no hardcoded ids), HARD-EXCLUDES the management substrate (juju, lxd, tailscale), destroys +the `openstack` model, and deletes the orphaned `capi-mgmt` machine. A pre-destroy juju +export/status capture runs first (reference only; NOT a restore path). + +**CHECK (read-only) -- jumphost** -- dry-run first (default; changes nothing) +```bash +scripts/phase-00-teardown.sh +``` +**Expect:** the four openstack hosts listed as release targets + `capi-mgmt` as the delete +target; PROTECTED juju / lxd / tailscale shown as excluded. Confirm the resolved system_ids +look right before applying. + +> CAUTION: destroys the entire `openstack` Juju model and DELETES the `capi-mgmt` MAAS +> machine -- irreversible. Confirm you are on the test cloud, not Roosevelt. + +**RUN -- jumphost** +```bash +scripts/phase-00-teardown.sh --apply +``` +**GATE:** `juju models` shows no `openstack`; `maas admin machines read` shows openstack0-3 +all `Ready` and `capi-mgmt` gone. (`link-subnet` is REJECTED on a Deployed machine -- the +carve in Step 5 REQUIRES Ready.) If the model is still `destroying` after ~10 min: +`juju remove-machine -m openstack --force ` for each lingering id, then re-run --apply. + +## Step 3 -- Remove the idle 8_lbaas libvirt network (hosts off) one-off + +Each host still carries an idle `virtio` NIC on the isolated `8_lbaas` libvirt network +(bridge `virbr6`, no L3, ex-lbaas). MAAS has no `lbaas` space; the NIC is unused. Remove it +now while the hosts are shut off. This is a one-off jumphost op (Roosevelt is bare metal, no +libvirt nets) -- it is NOT part of any phase-00 script; log it to the as-executed log. + +> CAUTION: detaches a NIC from each host's persistent domain config and undefines a libvirt +> network. Reversible (XML backed up first); the detach uses `--config` only (no live change). + +Use the two gated blocks from the as-executed log / session notes -- do NOT improvise an +irreversible libvirt op: +- **Block 1:** back up `~/8_lbaas-net.xml.bak`; pre-check every host `domstate = shut off` + (REFUSE otherwise); detach the idle NIC per host (`virsh detach-interface network + --mac --config`); verify no domain still references 8_lbaas. +- **Block 2:** `virsh net-destroy 8_lbaas`; `virsh net-undefine 8_lbaas`; confirm gone. + +**GATE:** `sudo virsh net-list --all` shows no `8_lbaas`; no host domain references it. + +## Step 4 -- OSD secondary-disk wipe (clean-slate Ceph) DISCRETE / DESTRUCTIVE + +Only after Step 2 GATE is green (hosts Ready, shut off) AND explicit go. `vda` (the OS disk) +is NOT touched -- MAAS reinstalls it on deploy; only `vdb` (the OSD target) is recreated blank. > CAUTION: deletes and recreates each host's vdb OSD disk (512G blank) -- destroys all Ceph -> OSD data. vda (OS disk) is untouched. Run only after Phase 0d is GREEN and on explicit go. +> OSD data. vda is untouched. Hosts must be shut off (post-release). (R7: sudo for qemu-img.) **RUN -- jumphost** ```bash @@ -155,7 +125,6 @@ sudo rm -f "$f" sudo qemu-img create -f qcow2 "$f" 512G sudo chown "$OWNER" "$f"; sudo chmod "$MODE" "$f" - sudo ls -lh "$f" done # verify for host in openstack0 openstack1 openstack2 openstack3; do @@ -164,138 +133,65 @@ ``` **GATE:** 4 files, ~200 KiB actual / 512 GiB virtual, root:root mode 600. -## Phase 3 -- Storage-class NIC links (idempotent; machines Ready) -Links every storage-class NIC to its space's subnet. enp8s0 (data) -is the one KNOWN unlinked and a HARD deploy prereq (nova-compute:neutron-plugin->data, -octavia:ovsdb-cms->data, chassis data bindings). enp9s0/enp10s0 back the C2 Ceph -public/cluster bindings; this links them too only if not already linked. +## Step 5 -- Pattern A interface re-carve (per host; machines Ready) -**RUN -- jumphost** +`scripts/carve-host-interfaces.sh` rebuilds each host's interface tree to **Pattern A** on +the EXISTING D-052/D-053 subnets: +- `enp1s0` -> OVS `br-ex` + STATIC `10.12.4.N` (provider-public) -- MAAS builds the OVS bridge; + ovn-chassis consumes it (bridge-interface-mappings + physnet1:br-ex), API containers attach. +- `enp7s0` -> `br-metal` (STATIC `10.12.8.N`) -> `br-metal.103` -> `br-internal` (STATIC `10.12.12.N`). +- `enp8s0` / `enp9s0` / `enp10s0` raw + STATIC on data `10.12.16.N` / storage `10.12.32.N` / + replication `10.12.36.N`. + +It resolves every id live, is idempotent, and requires Ready (interface edits are rejected on +Deployed). + +**CHECK (read-only) -- jumphost** -- dry-run each host first (default) ```bash -declare -A NIC_CIDR=( [enp8s0]=10.12.12.0/22 [enp9s0]=10.12.16.0/22 [enp10s0]=10.12.20.0/22 ) -declare -A HOST_OCTET=( [4na83t]=40 [qdbqd6]=41 [h8frng]=42 [tmsafc]=43 ) -declare -A HN=( [4na83t]=openstack0 [qdbqd6]=openstack1 [h8frng]=openstack2 [tmsafc]=openstack3 ) - -for SID in 4na83t qdbqd6 h8frng tmsafc; do - echo "=== ${HN[$SID]} ($SID) ===" - IFJSON=$(maas admin interfaces read "$SID") - for NIC in enp8s0 enp9s0 enp10s0; do - cidr="${NIC_CIDR[$NIC]}"; prefix="${cidr%.0/22}"; ip="${prefix}.${HOST_OCTET[$SID]}" - ifid=$(echo "$IFJSON" | jq -r --arg n "$NIC" '.[]|select(.name==$n)|.id') - if [ -z "$ifid" ]; then echo " $NIC: NOT FOUND -- inspect 'maas admin interfaces read $SID'"; continue; fi - linked=$(echo "$IFJSON" | jq -r --arg c "$cidr" --argjson id "$ifid" \ - '[.[]|select(.id==$id).links[]?|select(.subnet.cidr==$c)]|length') - if [ "$linked" != "0" ]; then echo " $NIC id=$ifid already on $cidr -- SKIP"; continue; fi - subid=$(maas admin subnets read | jq -r --arg c "$cidr" '.[]|select(.cidr==$c)|.id') - echo " $NIC id=$ifid -> $ip (subnet id=$subid, $cidr)" - maas admin interface link-subnet "$SID" "$ifid" mode=STATIC subnet="$subid" ip_address="$ip" - done -done - -# verify -- every host should now show data/storage/replication links -for SID in 4na83t qdbqd6 h8frng tmsafc; do - echo "=== ${HN[$SID]} ($SID) ===" - maas admin interfaces read "$SID" \ - | jq -r '.[] | select(.name|test("^enp(8|9|10)s0$")) | " \(.name)\t\([.links[]?|{(.subnet.cidr):.ip_address}])"' -done +for h in openstack0 openstack1 openstack2 openstack3; do scripts/carve-host-interfaces.sh "$h"; done ``` -**GATE:** each host's enp8s0/enp9s0/enp10s0 shows a 10.12.{12,16,20}.4N STATIC link. +**Expect:** each plan ends `Summary: 0 fatal`; the provider plane shows +`create br-ex (OVS) parent=enp1s0` and `br-ex -> STATIC 10.12.4.N`; metal / internal / data / +storage / replication statics as above. No `br-prov-api`, no `enp1s0.104`, no provider-vip. -## Phase 4 -- MAAS VIP/FIP address carve (mutation; confirm-first) -The bundle's VIPs live in the front-loaded /26 blocks; the FIP -pool (phase-04) lives at 10.12.5.0-10.12.7.254. These MAAS reservations persist -across teardown, so on a repeat rebuild they usually already exist -- verify, create -only if absent, and delete the stale old-scheme reservation. (KI-P3-001: a reserved -range stops MAAS auto-static landing a primary on a configured VIP.) +> CAUTION: mutates MAAS interface definitions on each host. Re-runnable (idempotent), but +> apply ONE host at a time and re-read the resulting tree. -**CHECK (read-only) -- jumphost** +**RUN -- jumphost** (per host) ```bash -# 4a. verify current state -maas admin ipranges read | jq -r '.[] | "id=\(.id)\t\(.type)\t\(.start_ip)-\(.end_ip)\tsubnet=\(.subnet.cidr // "?")\t\(.comment // "")"' | sort -# want present: provider .4.2-.63 (subnet 1), metal .8.2-.63 (subnet 2), provider FIP .5.0-.7.254. -# want absent : metal .8.224-.254 (stale). +scripts/carve-host-interfaces.sh openstack0 --apply +# then openstack1, openstack2, openstack3 (one at a time) ``` +**GATE:** each host shows `br-ex` (type ovs) STATIC `10.12.4.N`; `br-metal` `10.12.8.N`; +`br-internal` `10.12.12.N`; `enp8s0`/`enp9s0`/`enp10s0` STATIC on `10.12.16/32/36.N`. -**RUN -- jumphost** +## Step 6 -- Standup + bundle gate (READ-ONLY; before deploy) + +**CHECK (read-only) -- jumphost** -- MAAS topology ```bash -# 4b. create the front-loaded VIP reservations ONLY if absent (idempotent; carve doc section 8) -( { - RANGES="$(maas admin ipranges read)" - [ -n "$RANGES" ] || { echo "ipranges read failed/empty -- ABORT (do not create blind)"; exit 1; } - # provider VIPs 10.12.4.2-.63 (subnet 1) - if printf '%s' "$RANGES" | jq -e '.[]|select(.start_ip=="10.12.4.2" and .end_ip=="10.12.4.63")' >/dev/null; then - echo "provider .4.2-.63 present -- SKIP" - else - maas admin ipranges create type=reserved subnet=1 start_ip=10.12.4.2 end_ip=10.12.4.63 \ - comment="OpenStack public API HA VIPs (front-loaded /26; supersedes .224-.236)" - fi - # metal VIPs 10.12.8.2-.63 (subnet 2) - if printf '%s' "$RANGES" | jq -e '.[]|select(.start_ip=="10.12.8.2" and .end_ip=="10.12.8.63")' >/dev/null; then - echo "metal .8.2-.63 present -- SKIP" - else - maas admin ipranges create type=reserved subnet=2 start_ip=10.12.8.2 end_ip=10.12.8.63 \ - comment="OpenStack internal/admin API HA VIPs (front-loaded /26; supersedes D-020 .224-.254)" - fi -} ) +scripts/phase-00-maas-standup.sh ``` +**Expect:** `no drift` and `OK (dryrun) -- topology consistent with D-052/D-053`. Any DRIFT +line is a hard stop (do not deploy onto a mis-bound plane). -**RUN -- jumphost** +**CHECK (read-only) -- jumphost** -- bundle invariants ```bash -# 4c. delete the stale .224-.254 metal reservation -- CONFIRM the id from 4a first (this arc: id=2) -# maas admin iprange delete +python3 scripts/provider-bundle-check.py bundle.yaml ``` -**GATE:** `ipranges read` shows provider FIP + provider VIPs .4.2-.63 + metal VIPs -.8.2-.63; the metal .8.224-.254 reservation is gone; the metal DHCP dynamic -(10.12.9.0-10.12.11.254) is unchanged. - -## Phase 5 -- Post-prep verification (READ-ONLY gate before deploy) - -**CHECK (read-only) -- jumphost** -```bash -( { - maas admin spaces read | jq -r '.[] | "\(.name)\t\([.subnets[]?.cidr] | join(", "))"' # DOCFIX-026: 5 spaces (juju spaces FAILS here -- model gone post-teardown) - maas admin machines read | jq -r '.[]|select(.hostname|test("^openstack[0-3]$"))|"\(.hostname)\t\(.status_name)"' | sort # all Ready - for SID in 4na83t qdbqd6 h8frng tmsafc; do echo "-- $SID --" - maas admin interfaces read "$SID" | jq -r '.[]|select(.name|test("^enp(8|9|10)s0$"))|" \(.name)\t\([.links[]?|{(.subnet.cidr):.ip_address}])"' - done # data/storage/replication links on all four - for host in openstack0 openstack1 openstack2 openstack3; do - sudo qemu-img info "/var/lib/libvirt/images/${host}-1.qcow2" | grep -E 'virtual size|disk size' - done # OSD 512G blank -} ) -``` +**Expect:** `PASS` -- 11 charms `public->provider-public`, `.4/.8/.12` VIP triples, all 4 +chassis MACs present (incl openstack0). --- ## EXIT GATE (phase-00 complete) -- `juju models` shows no `openstack`; openstack0-3 all Ready. +- `juju models` shows no `openstack`; openstack0-3 all Ready; `capi-mgmt` DELETED. +- `8_lbaas` libvirt network gone; no host domain references it. - OSD vdb files 512 GiB blank (root:root, 600) on all four hosts. -- enp8s0/enp9s0/enp10s0 linked (10.12.{12,16,20}.4N STATIC) on all four. -- MAAS carve: front-loaded VIP /26 reserved on provider + metal; FIP pool reserved; - stale .224-.254 gone. -- Clean slate ready for phase-01 (deploy). NOTE: the deploy uses ONE overlay - (octavia-pki only) -- NOT the vr0-dc0-testcloud overlay (R10; that overlay's intent - is folded into the hardened base bundle). - -## As-built reference (rebuild-prep arc -- audit trail) -- Teardown D-018: `juju destroy-model openstack --force --no-wait --destroy-storage - --no-prompt`; release the four hosts by system_id (capi-mgmt left Ready). -- OSD wipe proven 2026-05-22, re-run 2026-05-30: 512G blank, root:root, 600. -- NIC links: enp8s0 found UNLINKED this arc (the hard prereq); enp9s0/enp10s0 already - linked. Reference enp8s0 ids (arc): openstack1=26, openstack2=32, openstack3=38; - openstack0 resolved dynamically (the block does not depend on these). -- MAAS carve: front-loaded .2-.63 reservations created earlier and persistent; stale - metal .224-.254 was iprange id=2 (deleted after confirmation). -- DEVIATION-2 (2026-06-11): hypervisor 196 GB; openstack0-3 each 16384 -> 32768 MiB - (virsh setmaxmem/setmem --config while shut off, post-OSD-wipe), then MAAS recommission - with `skip_networking=1 skip_storage=1 testing_scripts=none` -- refreshes hardware - inventory WITHOUT losing interface links/storage layout (all 12 storage links preserved; - 4x Ready at 32768 in ~3 min). D-040 reserved-host-memory 8192 retained (correctness floor, - not a function of total RAM). Per-host footprint for Roosevelt rebalancing is measured at - the 32 GiB envelope (16 GiB-era pressure numbers do not map 1:1). [recommission pattern -> appendix-A] -- DEVIATION-3 (2026-06-11): the destroy-model released Juju machine 4 (the retired D-033 - out-of-cloud capi-mgmt MAAS node) as a side effect; MAAS shows capi-mgmt = Ready (landed - Ready, not re-released by the Phase 1C loop, which targeted only the four system_ids). - The separate "Phase 7 teardown of old MAAS capi-mgmt node" queue item is thereby closed. +- Pattern A interfaces on all four: `br-ex` (OVS) STATIC `.4.N`; `br-metal` `.8.N`; + `br-internal` `.12.N`; data / storage / replication `.16/.32/.36.N`. +- `phase-00-maas-standup.sh` reports no drift; `provider-bundle-check.py` PASSes. +- Clean slate ready for phase-01. The deploy uses ONE overlay (octavia-pki) -- NOT the + vr0-dc0-testcloud overlay (its intent is folded into the hardened base bundle). ## Next phase-01 -- bundle deploy. diff --git a/runbooks/provider-vip-maas-standup.md b/runbooks/provider-vip-maas-standup.md deleted file mode 100644 index a02d626..0000000 --- a/runbooks/provider-vip-maas-standup.md +++ /dev/null @@ -1,192 +0,0 @@ -# provider-vip MAAS stand-up (D-057) - -> NOTE (end-of-deployment review -- R1, see D-057-REVIEW-ITEMS.md): the PHASE 2 -> create blocks below are now SUPERSEDED by the tested, idempotent, dry-run-default -> `scripts/provider-vip-standup.sh`, which is the preferred execution path. They are -> retained here (not trimmed) for reference and as a manual fallback. PHASE 1 (audit), -> the virbr1 vlan_filtering gate, and the deferred jumphost-gateway reads remain -> uniquely useful. Reconcile (trim-to-pointer or annotate) in the post-D-011 sweep. - -Builds the provider-vip plane in MAAS so the carve and bundle have something to -resolve against. New plane: space `provider-vip`, VLAN VID 104 on the **provider -fabric** (the fabric that owns 10.12.4.0/22), subnet 10.12.8.0/22, reserved VIP -band .24.2-.100. - -RUN ON: the jumphost (`ssh jessea123@10.17.11.246`), MAAS admin profile already -logged in (`maas admin ...`). All values are resolved live by name/CIDR -- no -hardcoded MAAS ids (PATTERN-1). - -PRINCIPLE: PHASE 1 is read-only -- run it and report the output back BEFORE any -create. The audit is designed to surface anything that would change the carve or -bundle (VID 104 already taken, unexpected provider fabric, vlan_filtering=1 on -virbr1, metal-internal mtu/dns differing from assumptions). Do not run PHASE 2 -until PHASE 1 is reviewed. - -SCOPE NOTE: this stands up the MAAS side only. The jumphost L3 gateway -(virbr1.104 = 10.12.8.1) that makes .4<->.8 routing real is a separate host -step (libvirt/netplan, persistence-method TBD from a live read) -- deferred to -its own block, and NOT required for the MAAS plane to be created or for the carve -to resolve. It is required before D-011 #3 (tenant -> API reachability). - -================================================================================ -## PHASE 1 -- AUDIT (read-only; run, then PASTE OUTPUT BACK) -================================================================================ - ---- BEGIN runbook block: pvip-01-audit (RUN ON jumphost) --- -echo "=== A1: provider fabric (owns 10.12.4.0/22) -- VID 104 must live HERE ===" -maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22") - |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, fabric_id:.vlan.fabric_id, - space, gateway_ip, dns_servers}' - -echo "=== A2: metal-internal VID 103 -- the TEMPLATE to mirror (mtu/managed/dns/dhcp) ===" -maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.16.0/22") - |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, vlan_mtu:.vlan.mtu, - vlan_dhcp_on:.vlan.dhcp_on, space, managed, gateway_ip, dns_servers, - allow_dns, allow_proxy, rdns_mode}' - -echo "=== A3: VID 104 collision check -- expect EMPTY on every fabric ===" -maas admin subnets read | jq -r '[.[].vlan|{vid,fabric,id}]|unique_by(.id)[]|select(.vid==104)' -PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') -echo " provider fabric_id = $PROV_FAB ; VLANs already on it:" -maas admin vlans read "$PROV_FAB" | jq -r '.[]|{vid,name,id,space}' - -echo "=== A4: provider-vip must NOT already exist -- expect EMPTY for both ===" -maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.cidr' -maas admin spaces read | jq -r '.[]|select(.name=="provider-vip")|.name' - -echo "=== A5: provider-public reserved ranges -- mirror this pattern for .8 ===" -maas admin ipranges read | jq -r '.[]|select(.start_ip|startswith("10.12.4.")) - |{type, start_ip, end_ip, comment}' - -echo "=== A6: GATE -- virbr1 must pass tagged frames (VID 104). MUST print 0 ===" -cat /sys/class/net/virbr1/bridge/vlan_filtering 2>/dev/null \ - || echo "WARN: virbr1 has no bridge/vlan_filtering node (not a bridge?) -- investigate" - -echo "=== A7: jumphost must not already carry .8 -- expect 'clean' ===" -ip -br addr show | grep -E 'virbr1\.104|10\.12\.8\.' || echo "clean: no .8 on jumphost yet" ---- END runbook block: pvip-01-audit --- - -STOP. Report A1-A7. Expected / change-triggers: -- A1: provider fabric_id is the home for VID 104. Note its value; C2/C3 use it. -- A2: gives mtu (almost certainly 1500) + dns_servers + managed for provider-vip - to mirror. If mtu != 1500, C2 must match it. If dns_servers differ from a prior - assumption, C4b uses the value read here -- not a guessed 10.12.12.1. -- A3: must be EMPTY. If VID 104 is already in use, STOP -- pick another VID and - update lib-net.sh PROVIDER_VIP_VID + the carve assert + this runbook in lockstep. -- A4: both EMPTY. If either exists, a prior partial run happened -- reconcile, do - not blind-create. -- A6: MUST be 0. If 1, STOP -- VID 104 will not traverse virbr1 and the whole - tagged-secondary approach needs rework (per-port VLAN membership, or a different - bridge). This is the make-or-break gate; flag it loudly. - -================================================================================ -## PHASE 2 -- CREATE (only after PHASE 1 reviewed; run ONE block at a time) -================================================================================ - -Re-resolve the helpers at the top of every shell you run these in (they are not -persisted between SSH sessions): - - PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') - MTU_PROV=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.mtu') # VID 104 parent = provider fabric - DNS103=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.16.0/22")|.dns_servers|join(",")') - -GATE C1 -- create the space. ---- BEGIN runbook block: pvip-02-space --- -maas admin spaces create name=provider-vip -maas admin spaces read | jq -r '.[]|select(.name=="provider-vip")|{name,id}' ---- END runbook block: pvip-02-space --- - -GATE C2 -- create VLAN 104 on the PROVIDER fabric, mtu mirroring the PROVIDER untagged VLAN -(VID 104 is a child of enp1s0 on the provider fabric -- its MTU must track that parent, -NOT metal-internal which lives on a different fabric). -(Confirm flags first if unsure: `maas admin vlans create --help`.) ---- BEGIN runbook block: pvip-03-vlan --- -PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') -MTU_PROV=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.mtu') # provider parent, not metal-internal -maas admin vlans create "$PROV_FAB" name=provider-vip vid=104 mtu="$MTU_PROV" -maas admin vlans read "$PROV_FAB" | jq -r '.[]|select(.vid==104)|{vid,name,id,fabric_id,mtu,space}' ---- END runbook block: pvip-03-vlan --- - -GATE C3 -- assign the VID-104 VLAN to the provider-vip space. -(If `space=` is rejected, retry with `space=provider-vip`.) ---- BEGIN runbook block: pvip-04-assign-space --- -PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') -SPACE_ID=$(maas admin spaces read | jq -r '.[]|select(.name=="provider-vip")|.id') -maas admin vlan update "$PROV_FAB" 104 space="$SPACE_ID" -maas admin vlans read "$PROV_FAB" | jq -r '.[]|select(.vid==104)|{vid,id,space}' ---- END runbook block: pvip-04-assign-space --- - -GATE C4 -- create the subnet on the VID-104 VLAN, then set gateway/dns/managed. -Split into create (minimal) + update (confirmed `subnet update` form) to avoid -guessing which flags `subnets create` accepts. ---- BEGIN runbook block: pvip-05-subnet --- -PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') -VID104_VLANID=$(maas admin vlans read "$PROV_FAB" | jq -r '.[]|select(.vid==104)|.id') -DNS103=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.16.0/22")|.dns_servers|join(",")') - -# 5a: create (minimal -- cidr + vlan) -maas admin subnets create cidr=10.12.8.0/22 vlan="$VID104_VLANID" - -# 5b: routed-plane gateway (D-057) + managed; dns mirrors VID 103 if set -maas admin subnet update 10.12.8.0/22 gateway_ip=10.12.8.1 managed=true -[ -n "$DNS103" ] && maas admin subnet update 10.12.8.0/22 dns_servers="$DNS103" - -maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.8.0/22") - |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, space, managed, gateway_ip, dns_servers}' ---- END runbook block: pvip-05-subnet --- - -GATE C5 -- reserved VIP band .24.2-.100 (VIPs .50-.60 live in it; mirrors .4.2-.100). ---- BEGIN runbook block: pvip-06-range --- -SUB24=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.id') -maas admin ipranges create type=reserved subnet="$SUB24" \ - start_ip=10.12.8.2 end_ip=10.12.8.100 comment="provider-vip API VIP band (D-057)" -maas admin ipranges read | jq -r '.[]|select(.start_ip|startswith("10.12.8.")) - |{type, start_ip, end_ip, comment}' ---- END runbook block: pvip-06-range --- - -================================================================================ -## PHASE 3 -- VERIFY (read-only) -- proves the carve will resolve + Juju sees it -================================================================================ - ---- BEGIN runbook block: pvip-07-verify --- -echo "=== carve resolvers, simulated against live MAAS (must match the script) ===" -SUB=$(maas admin subnets read) -echo "subid_of 10.12.8.0/22 = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.id') (expect non-empty)" -echo "vlanid_of 10.12.8.0/22 = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|(.vlan.id // .vlan)') (the VLAN obj id)" -echo "vlanvid_of 10.12.8.0/22 = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.vlan.vid') (MUST be 104)" -echo "space = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.space') (MUST be provider-vip)" - -echo "=== Juju visibility (real consumption is at redeploy; this pre-validates) ===" -juju reload-spaces -juju spaces | grep -E 'provider-vip|provider-public' || echo "WARN: provider-vip not visible to Juju" ---- END runbook block: pvip-07-verify --- - -PASS CRITERIA: -- vlanvid_of == 104, space == provider-vip, subid_of non-empty. -- `juju spaces` lists provider-vip. -At this point the carve's PHASE-1 asserts (`no MAAS subnet for 10.12.8.0/22`, -`provider-vip ... expected 104`) will pass, and the files are safe to drop in. - -================================================================================ -## DEFERRED (separate step; needs a live read first) -- jumphost L3 gateway -================================================================================ - -virbr1.104 = 10.12.8.1 on the jumphost makes .4<->.8 routing real (ip_forward -is already on). NOT needed for the MAAS plane or the carve; needed before D-011 -#3. Before writing it I want a live read of how the jumphost defines virbr1 so -the persistence method is correct (libvirt network XML vs netplan vs a systemd -unit -- virbr1 is libvirt-managed, so a naive netplan vlan-on-virbr1 may race -libvirt at boot): - - ip -d link show virbr1 - virsh net-dumpxml 1_provider 2>/dev/null | sed -n '1,40p' - ls /etc/netplan/ ; sudo grep -RnE 'virbr1|10\.12\.4\.1' /etc/netplan/ 2>/dev/null - -Report that and I will write the gated gateway block (with a clean rollback). - -POST-DEPLOY WATCH-ITEM (gateway_ip safeguard): after redeploy, confirm every API -container's DEFAULT route is still via metal-admin 10.12.12.1, NOT 10.12.8.1: - juju exec --all -- ip route show default -If any unit defaults via .24.1, drop the subnet gateway_ip (set to "") or pin the -node default gateway to the metal-admin subnet; provider-vip reachability does -not depend on its own gateway in v1. diff --git a/scripts/carve-host-interfaces.sh b/scripts/carve-host-interfaces.sh index b5bcba3..b0db8fc 100644 --- a/scripts/carve-host-interfaces.sh +++ b/scripts/carve-host-interfaces.sh @@ -1,23 +1,24 @@ #!/usr/bin/env bash # scripts/carve-host-interfaces.sh [--apply] # -# Strategy-B interface carve for ONE freshly-commissioned host. Reconstructs the +# Pattern-A interface carve for ONE freshly-commissioned host. Reconstructs the # host network tree that was lost when the machine was decomposed. Default is # DRY-RUN (resolves every id live and prints each mutation it WOULD run, changes # nothing). Pass --apply to execute. # # Target tree (octet N = .40-.43 by host index; see lib-hosts.sh HOST_OCTET): -# enp1s0 raw, NO L3, UNTAGGED 1_provider (D-057) -- ovn-chassis MAC-enslaves it -# into OVS br-ex at deploy; the uplink carries NO -# host static (a static here forced a Linux bridge -# that starved br-ex). MAAS must leave enp1s0 RAW. -# enp1s0.104 --> br-prov-api (standard bridge) + STATIC 10.12.8.N (provider-vip; D-057) -# tagged secondary; public API VIP plane + container -# 'public' attach. Mirrors the metal-internal stack. -# enp7s0 --> br-metal (standard bridge) + STATIC 10.12.12.N (metal-admin) +# enp1s0 --> br-ex (OVS bridge) + STATIC 10.12.4.N (provider-public; Pattern A) +# MAAS builds the OVS br-ex on enp1s0 and the host +# static lands on br-ex. enp1s0 becomes an L2-only +# OVS member; ovn-chassis consumes br-ex via +# bridge-interface-mappings (MAC) + ovn-bridge- +# mappings physnet1:br-ex -- it does NOT build its +# own bridge, and the API LXD containers' 'public' +# endpoint attaches to this same OVS br-ex. +# enp7s0 --> br-metal (standard bridge) + STATIC 10.12.8.N (metal-admin) # br-metal.103 (vlan, VID 103) -# --> br-internal (standard bridge) + STATIC 10.12.16.N (metal-internal) -# enp8s0 raw + STATIC 10.12.20.N (data-tenant) +# --> br-internal (standard bridge) + STATIC 10.12.12.N (metal-internal) +# enp8s0 raw + STATIC 10.12.16.N (data-tenant) # enp9s0 raw + STATIC 10.12.32.N (storage; Juju auto-bridges at deploy) # enp10s0 raw + STATIC 10.12.36.N (replication; Juju auto-bridges at deploy) # enp11s0 idle (ex-lbaas; no link) @@ -71,20 +72,17 @@ vlanvid_of(){ printf '%s' "$SUBNETS_JSON" | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.vid // empty)' | head -1; } # plane CIDRs (verified set; sourced order from lib-net PLANE_CIDRS) -C_PROV="10.12.4.0/22"; C_METAL="10.12.12.0/22"; C_INT="$METAL_INTERNAL_CIDR" # 10.12.16.0/22 -C_PVIP="$PROVIDER_VIP_CIDR" # 10.12.8.0/22 (D-057 provider-vip; tagged VID 104) -C_DATA="10.12.20.0/22"; C_STOR="10.12.32.0/22"; C_REPL="10.12.36.0/22" +C_PROV="10.12.4.0/22"; C_METAL="10.12.8.0/22"; C_INT="$METAL_INTERNAL_CIDR" # 10.12.12.0/22 +C_DATA="10.12.16.0/22"; C_STOR="10.12.32.0/22"; C_REPL="10.12.36.0/22" -# assert all planes resolve, and the tagged planes are really VID 103 / 104 -for c in "$C_PROV" "$C_METAL" "$C_INT" "$C_PVIP" "$C_DATA" "$C_STOR" "$C_REPL"; do +# assert all planes resolve, and the tagged plane is really VID 103 +for c in "$C_PROV" "$C_METAL" "$C_INT" "$C_DATA" "$C_STOR" "$C_REPL"; do [ -n "$(subid_of "$c")" ] || { fail "no MAAS subnet for $c"; } [ -n "$(vlanid_of "$c")" ] || { fail "no VLAN for $c"; } done [ "$FATAL" = 0 ] || exit 1 gotvid="$(vlanvid_of "$C_INT")" [ "$gotvid" = "$METAL_INTERNAL_VID" ] || { fail "metal-internal $C_INT is VID '$gotvid', expected $METAL_INTERNAL_VID"; exit 1; } -gotpvipvid="$(vlanvid_of "$C_PVIP")" -[ "$gotpvipvid" = "$PROVIDER_VIP_VID" ] || { fail "provider-vip $C_PVIP is VID '$gotpvipvid', expected $PROVIDER_VIP_VID"; exit 1; } # interface id by name (live) ifid_of() { maas_q interfaces read "$SID" | jq -r --arg n "$1" '.[]|select(.name==$n)|.id' | head -1; } @@ -148,7 +146,6 @@ printf " provider %s sub=%s vlan=%s\n" "$C_PROV" "$(subid_of "$C_PROV")" "$(vlanid_of "$C_PROV")" printf " metal %s sub=%s vlan=%s\n" "$C_METAL" "$(subid_of "$C_METAL")" "$(vlanid_of "$C_METAL")" printf " internal %s sub=%s vlan=%s (vid %s)\n" "$C_INT" "$(subid_of "$C_INT")" "$(vlanid_of "$C_INT")" "$gotvid" -printf " prov-vip %s sub=%s vlan=%s (vid %s)\n" "$C_PVIP" "$(subid_of "$C_PVIP")" "$(vlanid_of "$C_PVIP")" "$gotpvipvid" printf " data %s sub=%s vlan=%s\n" "$C_DATA" "$(subid_of "$C_DATA")" "$(vlanid_of "$C_DATA")" printf " storage %s sub=%s vlan=%s\n" "$C_STOR" "$(subid_of "$C_STOR")" "$(vlanid_of "$C_STOR")" printf " replicat %s sub=%s vlan=%s\n" "$C_REPL" "$(subid_of "$C_REPL")" "$(vlanid_of "$C_REPL")" @@ -165,53 +162,40 @@ emit "$nic(id=$id) -> STATIC $ip on subnet $sub" interface link-subnet "$SID" "$id" mode=STATIC subnet="$sub" ip_address="$ip" } -# D-057 helper: make a NIC raw + L3-LESS on its plane's UNTAGGED vlan (no subnet link), -# so ovn-chassis can MAC-enslave it into OVS br-ex at deploy. Idempotent. -ensure_raw_unlinked() { - local nic="$1" cidr="$2" id provvlan curvlan lid +# Pattern A helper: build the MAAS-managed OVS bridge br-ex on the provider uplink and put +# the host static on br-ex. enp1s0 becomes an L2-only OVS member; ovn-chassis then consumes +# br-ex (bridge-interface-mappings MAC + ovn-bridge-mappings physnet1:br-ex) -- it does NOT +# build its own bridge, and the API LXD containers' 'public' endpoint shares this same OVS +# br-ex. (Reverts the D-057 raw-uplink Pattern B, which let Juju build a competing Linux +# bridge br-enp1s0 that captured enp1s0 and starved OVS br-ex, darkening the floating-IP +# plane.) Idempotent. +build_ovs_brex() { + local nic="$1" cidr="$2" ip="$3" id lid brid id="$(ifid_of "$nic")"; [ -n "$id" ] || { fail "$nic not found on $HN"; return 1; } - provvlan="$(vlanid_of "$cidr")" - # unlink any subnet link(s) first so the uplink carries no L3 + # 1) clear the commissioning link(s) so the host L3 lands on br-ex, not the member NIC for lid in $(maas_q interfaces read "$SID" | jq -r --arg n "$nic" '.[]|select(.name==$n)|.links[]?|select(.subnet!=null)|.id'); do - emit "unlink $nic(id=$id) link id=$lid (L3-less for OVS br-ex)" interface unlink-subnet "$SID" "$id" id="$lid" + emit "unlink $nic(id=$id) commissioning link id=$lid (L3 moves to br-ex)" interface unlink-subnet "$SID" "$id" id="$lid" done - # ensure it sits on the UNTAGGED vlan of $cidr -- only if not already there (idempotent) - curvlan="$(maas_q interfaces read "$SID" | jq -r --arg n "$nic" '.[]|select(.name==$n)|(.vlan.id // .vlan // empty)' | head -1)" - if [ "$curvlan" != "$provvlan" ]; then - emit "$nic(id=$id) -> VLAN $provvlan ($cidr untagged, no L3)" interface update "$SID" "$id" vlan="$provvlan" - else - note "$nic already on VLAN $provvlan -- SKIP vlan set" - fi + # 2) create the OVS bridge br-ex on the provider uplink (skip if it already exists) + if [ -z "$(ifid_of br-ex)" ]; then + emit "create br-ex (OVS) parent=$nic(id=$id)" interfaces create-bridge "$SID" name=br-ex bridge_type=ovs parent="$id" + else note "br-ex exists -- SKIP create"; fi + [ "$MODE" = apply ] && brid="$(ifid_of br-ex)" || brid="" + # 3) host static on br-ex (provider-public presence) + if ! linked_to br-ex "$cidr"; then + emit "br-ex(id=$brid) -> STATIC $ip on subnet $(subid_of "$cidr")" \ + interface link-subnet "$SID" "$brid" mode=STATIC subnet="$(subid_of "$cidr")" ip_address="$ip" + else note "br-ex already on $cidr -- SKIP"; fi } -hdr "provider plane (enp1s0 RAW + L3-LESS -- OVS br-ex uplink; D-057)" -# D-057: the OVS provider uplink must carry NO L3. ovn-chassis MAC-enslaves enp1s0 into -# br-ex at deploy; the old host static 10.12.4.N is REMOVED here (it forced a Linux bridge -# br-enp1s0 that captured enp1s0 and starved br-ex). Leave enp1s0 raw + unlinked. -ensure_raw_unlinked enp1s0 "$C_PROV" - -hdr "provider-vip plane (enp1s0.$PROVIDER_VIP_VID -> br-prov-api -> static; D-057)" -# Tagged secondary on the provider NIC, mirroring metal-internal (br-metal.103 -> br-internal). -# The bundle binds the API charms' 'public' endpoint to the provider-vip space, so containers -# attach HERE (not the untagged uplink). The host provider-plane static MOVES here from enp1s0. -PEID="$(ifid_of enp1s0)"; [ -n "$PEID" ] || fail "enp1s0 not found" -# 1) enp1s0.104 (VLAN, VID 104) on enp1s0 -if [ -z "$(ifid_of "enp1s0.$PROVIDER_VIP_VID")" ]; then - emit "create enp1s0.$PROVIDER_VIP_VID (VID $PROVIDER_VIP_VID, vlan obj $(vlanid_of "$C_PVIP")) parent=enp1s0(id=$PEID)" \ - interfaces create-vlan "$SID" vlan="$(vlanid_of "$C_PVIP")" parent="$PEID" -else note "enp1s0.$PROVIDER_VIP_VID exists -- SKIP create"; fi -[ "$MODE" = apply ] && PVID="$(ifid_of "enp1s0.$PROVIDER_VIP_VID")" || PVID="" -# 2) br-prov-api (standard) on enp1s0.104 -if [ -z "$(ifid_of br-prov-api)" ]; then - emit "create br-prov-api (standard) parent=enp1s0.$PROVIDER_VIP_VID(id=$PVID)" \ - interfaces create-bridge "$SID" name=br-prov-api bridge_type=standard parent="$PVID" -else note "br-prov-api exists -- SKIP create"; fi -[ "$MODE" = apply ] && BPID="$(ifid_of br-prov-api)" || BPID="" -# 3) STATIC on br-prov-api (host provider-plane presence; OVS-free so no br-ex conflict) -if ! linked_to br-prov-api "$C_PVIP"; then - emit "br-prov-api(id=$BPID) -> STATIC 10.12.8.$OCTET on subnet $(subid_of "$C_PVIP")" \ - interface link-subnet "$SID" "$BPID" mode=STATIC subnet="$(subid_of "$C_PVIP")" ip_address="10.12.8.$OCTET" -else note "br-prov-api already on $C_PVIP -- SKIP"; fi +hdr "provider plane (enp1s0 -> OVS br-ex + STATIC; Pattern A)" +# Pattern A: MAAS builds the OVS bridge br-ex on enp1s0; the host static lands on br-ex and +# enp1s0 becomes an L2-only OVS member. ovn-chassis consumes br-ex at deploy via +# bridge-interface-mappings (MAC) + ovn-bridge-mappings physnet1:br-ex -- it does NOT build +# its own bridge, and the API LXD containers' 'public' endpoint attaches to this same OVS +# br-ex. (Reverts D-057's raw-uplink Pattern B, which let Juju build a competing Linux bridge +# br-enp1s0 that captured enp1s0 and starved OVS br-ex, darkening the floating-IP plane.) +build_ovs_brex enp1s0 "$C_PROV" "10.12.4.$OCTET" hdr "metal stack (enp7s0 -> br-metal -> br-metal.103 -> br-internal)" EID="$(ifid_of enp7s0)"; [ -n "$EID" ] || fail "enp7s0 not found" @@ -227,9 +211,9 @@ else note "br-metal exists -- SKIP create"; fi [ "$MODE" = apply ] && BMID="$(ifid_of br-metal)" || BMID="" if ! linked_to br-metal "$C_METAL"; then - release_self_indexed "10.12.12.$OCTET" "$(subid_of "$C_METAL")" || true - emit "br-metal(id=$BMID) -> STATIC 10.12.12.$OCTET on subnet $(subid_of "$C_METAL")" \ - interface link-subnet "$SID" "$BMID" mode=STATIC subnet="$(subid_of "$C_METAL")" ip_address="10.12.12.$OCTET" + release_self_indexed "10.12.8.$OCTET" "$(subid_of "$C_METAL")" || true + emit "br-metal(id=$BMID) -> STATIC 10.12.8.$OCTET on subnet $(subid_of "$C_METAL")" \ + interface link-subnet "$SID" "$BMID" mode=STATIC subnet="$(subid_of "$C_METAL")" ip_address="10.12.8.$OCTET" else note "br-metal already on $C_METAL -- SKIP"; fi # 3) br-metal.103 (VLAN, VID 103) on br-metal if [ -z "$(ifid_of br-metal.103)" ]; then @@ -244,12 +228,12 @@ else note "br-internal exists -- SKIP create"; fi [ "$MODE" = apply ] && BIID="$(ifid_of br-internal)" || BIID="" if ! linked_to br-internal "$C_INT"; then - emit "br-internal(id=$BIID) -> STATIC 10.12.16.$OCTET on subnet $(subid_of "$C_INT")" \ - interface link-subnet "$SID" "$BIID" mode=STATIC subnet="$(subid_of "$C_INT")" ip_address="10.12.16.$OCTET" + emit "br-internal(id=$BIID) -> STATIC 10.12.12.$OCTET on subnet $(subid_of "$C_INT")" \ + interface link-subnet "$SID" "$BIID" mode=STATIC subnet="$(subid_of "$C_INT")" ip_address="10.12.12.$OCTET" else note "br-internal already on $C_INT -- SKIP"; fi hdr "data / storage / replication (raw + static)" -carve_raw enp8s0 "$C_DATA" "10.12.20.$OCTET" +carve_raw enp8s0 "$C_DATA" "10.12.16.$OCTET" carve_raw enp9s0 "$C_STOR" "10.12.32.$OCTET" carve_raw enp10s0 "$C_REPL" "10.12.36.$OCTET" diff --git a/scripts/d057-bundle-check.py b/scripts/d057-bundle-check.py deleted file mode 100644 index 3d8637e..0000000 --- a/scripts/d057-bundle-check.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python3 -""" -d057-bundle-check.py -- focused, fail-closed QA for the D-057 provider-vip split. - -Asserts ONLY the D-057 end-state invariants on a Charmed-OpenStack bundle: - 1. exactly 11 API charms bind public -> provider-vip; none remain on provider-public - 2. every clustered VIP is a triple: provider-vip(10.12.8/22) admin(10.12.12/22) - internal(10.12.16/22), all sharing one last octet in 50-60 - 3. ovn-chassis bridge-interface-mappings has the 3 chassis MACs and NOT openstack0's - -This does NOT re-review the whole bundle. scripts/review-bundle.py predates D-052 -(it forbids the per-endpoint bindings D-052 added) and is not a current gate -- see -docs/design-decisions.md / the end-of-deployment review note. FAIL -> exit 1. -ASCII-only output. -""" -import sys, re, ipaddress -try: - import yaml -except ImportError: - sys.stderr.write("ERROR: PyYAML not installed (pip install pyyaml --break-system-packages)\n"); sys.exit(2) - -PVIP = ipaddress.ip_network("10.12.8.0/22") -ADMIN = ipaddress.ip_network("10.12.12.0/22") -INTERNAL = ipaddress.ip_network("10.12.16.0/22") -OCTET_LO, OCTET_HI = 50, 60 -EXPECT_PUBLIC_VIP = 11 -OPENSTACK0_MAC = "52:54:00:3d:fd:54" -EXPECT_CHASSIS_MACS = {"52:54:00:9d:63:77", "52:54:00:89:7f:ce", "52:54:00:99:fc:c2"} -MAC_RE = re.compile(r"[0-9a-f]{2}(?::[0-9a-f]{2}){5}") - -def main(): - path = sys.argv[1] if len(sys.argv) > 1 else "bundle.yaml" - try: - doc = yaml.safe_load(open(path, encoding="utf-8")) - except Exception as e: - sys.stderr.write("ERROR: cannot parse %s: %s\n" % (path, e)); return 2 - apps = (doc or {}).get("applications", {}) or {} - fails, oks = [], [] - - def pub(s): return ((s or {}).get("bindings", {}) or {}).get("public") - vip_old = sorted(n for n, s in apps.items() if pub(s) == "provider-public") - vip_new = sorted(n for n, s in apps.items() if pub(s) == "provider-vip") - if vip_old: - fails.append("public still on provider-public: %s" % ", ".join(vip_old)) - if len(vip_new) != EXPECT_PUBLIC_VIP: - fails.append("public->provider-vip count=%d (expect %d): %s" % (len(vip_new), EXPECT_PUBLIC_VIP, ", ".join(vip_new))) - else: - oks.append("%d charms bind public->provider-vip; none on provider-public" % len(vip_new)) - - vip_ok = 0 - for n, s in apps.items(): - vip = ((s or {}).get("options", {}) or {}).get("vip") - if not vip: - continue - parts = str(vip).split() - if len(parts) != 3: - fails.append("%s vip not a triple: %r" % (n, vip)); continue - prov, adm, intr = parts - try: - okp = ipaddress.ip_address(prov) in PVIP - oka = ipaddress.ip_address(adm) in ADMIN - oki = ipaddress.ip_address(intr) in INTERNAL - except ValueError as e: - fails.append("%s bad vip ip: %s" % (n, e)); continue - if not okp: fails.append("%s provider leg %s not in %s" % (n, prov, PVIP)); continue - if not oka: fails.append("%s admin leg %s not in %s" % (n, adm, ADMIN)); continue - if not oki: fails.append("%s internal leg %s not in %s" % (n, intr, INTERNAL)); continue - octs = {p.split(".")[-1] for p in parts} - if len(octs) != 1: - fails.append("%s vip octets differ: %r" % (n, vip)); continue - o = int(octs.pop()) - if not (OCTET_LO <= o <= OCTET_HI): - fails.append("%s vip octet .%d outside %d-%d" % (n, o, OCTET_LO, OCTET_HI)); continue - vip_ok += 1 - if vip_ok: - oks.append("%d clustered VIP(s) are provider-vip/admin/internal triples, octet 50-60" % vip_ok) - - for n, s in apps.items(): - if (s or {}).get("charm") != "ovn-chassis": - continue - bim = str(((s or {}).get("options", {}) or {}).get("bridge-interface-mappings", "")) - if not bim: - continue - macs = set(MAC_RE.findall(bim.lower())) - if OPENSTACK0_MAC in macs: - fails.append("%s still maps openstack0 MAC %s (should be trimmed)" % (n, OPENSTACK0_MAC)) - missing = EXPECT_CHASSIS_MACS - macs - if missing: - fails.append("%s missing chassis MAC(s): %s" % (n, ", ".join(sorted(missing)))) - if OPENSTACK0_MAC not in macs and not missing: - oks.append("%s bridge-interface-mappings: 3 chassis MACs present, openstack0 trimmed" % n) - - for o in oks: print(" [ok] %s" % o) - for f in fails: print(" [FAIL] %s" % f) - print("\n%s: D-057 bundle invariants (%s)" % ("PASS" if not fails else "FAIL", path)) - return 1 if fails else 0 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/lib-net.sh b/scripts/lib-net.sh index 6bb17c0..76b0c58 100644 --- a/scripts/lib-net.sh +++ b/scripts/lib-net.sh @@ -19,31 +19,29 @@ fi # --- The six MAAS spaces / planes (D-052 / D-053). --- -PLANE_CIDRS=( "10.12.4.0/22" "10.12.12.0/22" "10.12.16.0/22" "10.12.20.0/22" "10.12.32.0/22" "10.12.36.0/22" ) +PLANE_CIDRS=( "10.12.4.0/22" "10.12.8.0/22" "10.12.12.0/22" "10.12.16.0/22" "10.12.32.0/22" "10.12.36.0/22" ) declare -A PLANE_NAME=( ["10.12.4.0/22"]="provider-public" - ["10.12.12.0/22"]="metal-admin" - ["10.12.16.0/22"]="metal-internal" - ["10.12.20.0/22"]="data-tenant" + ["10.12.8.0/22"]="metal-admin" + ["10.12.12.0/22"]="metal-internal" + ["10.12.16.0/22"]="data-tenant" ["10.12.32.0/22"]="storage" ["10.12.36.0/22"]="replication" ) SPACES6=( provider-public metal-admin metal-internal data-tenant storage replication ) # Names that MUST be gone after the D-052 / D-053 cutover (deploy fails or mis-binds if any reappear). -STALE_SPACES=( provider metal data fabric-data lbaas ) +STALE_SPACES=( provider metal data fabric-data lbaas provider-vip ) # Gateways: only provider-public and metal-admin route; the other four are gw=none. -declare -A PLANE_GW=( ["10.12.4.0/22"]="10.12.4.1" ["10.12.12.0/22"]="10.12.12.1" ) +declare -A PLANE_GW=( ["10.12.4.0/22"]="10.12.4.1" ["10.12.8.0/22"]="10.12.8.1" ) # The four non-API, non-PXE planes whose host NICs MAAS must have provisioned. -DATA_PLANE_CIDRS=( "10.12.16.0/22" "10.12.20.0/22" "10.12.32.0/22" "10.12.36.0/22" ) +DATA_PLANE_CIDRS=( "10.12.12.0/22" "10.12.16.0/22" "10.12.32.0/22" "10.12.36.0/22" ) # metal-internal is a TAGGED VLAN bridged on the metal fabric; host links land on br-internal. -METAL_INTERNAL_CIDR="10.12.16.0/22" +METAL_INTERNAL_CIDR="10.12.12.0/22" METAL_INTERNAL_VID="103" -PROVIDER_VIP_CIDR="10.12.8.0/22" # D-057: public API VIP plane (tagged secondary on enp1s0) -PROVIDER_VIP_VID="104" # D-057: tagged VLAN for provider-vip (sibling of 103) METAL_INTERNAL_IFACE="br-internal" # Host identity (hostnames, octets, boot MACs, system_id resolution) now lives in @@ -53,9 +51,9 @@ # Triple HA VIPs (D-020 + D-052): each API charm carries provider/admin/internal columns, # matching last octet, in the .50-.60 band. 11 clustered API charms. -VIP_PREFIX_PROVIDER="10.12.8" # provider-vip leg (D-057 moved public VIPs off provider-public .4) -VIP_PREFIX_ADMIN="10.12.12" -VIP_PREFIX_INTERNAL="10.12.16" +VIP_PREFIX_PROVIDER="10.12.4" # provider-public leg (public API VIPs share the FIP plane; Pattern A) +VIP_PREFIX_ADMIN="10.12.8" +VIP_PREFIX_INTERNAL="10.12.12" VIP_OCTET_MIN=50 VIP_OCTET_MAX=60 VIP_COUNT_EXPECT=11 diff --git a/scripts/phase-00-maas-recidr.sh b/scripts/phase-00-maas-recidr.sh deleted file mode 100644 index f627006..0000000 --- a/scripts/phase-00-maas-recidr.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env bash -# scripts/phase-00-maas-recidr.sh [--apply] -# -# Gated MAAS re-CIDR migration D-052/053 -> D-058 for the planes whose CIDR MOVES: -# metal-admin 10.12.8.0/22 -> 10.12.12.0/22 (untagged, metal fabric) -# metal-internal 10.12.12.0/22 -> 10.12.16.0/22 (VID 103, metal fabric) -# data-tenant 10.12.16.0/22 -> 10.12.20.0/22 (untagged, data fabric) -# (provider-vip 10.12.8.0/22 is NEW, not a move -- the standup creates it once .8 is freed.) -# -# REUSE-IN-PLACE: MAAS cannot change a subnet's CIDR, so each plane is migrated by -# deleting the old subnet and recreating it at the new CIDR on the SAME fabric + SAME -# VLAN. The VLAN (and its space assignment) persists across the subnet delete, so the -# new subnet inherits the correct space with no space/VLAN edits. Existing fabrics are -# kept (no orphaned fabrics). -# -# Default is DRY-RUN (audit): resolves everything live BY CIDR (PATTERN-1, no hardcoded -# ids), verifies each old subnet is on its expected space + VLAN, surfaces the metal/data -# fabric ids, lists reserved ranges + any live IP allocations, and prints the plan. Pass -# --apply to execute. COLLISION-SAFE: all old subnets are deleted BEFORE any new subnet is -# created (each new CIDR is the old CIDR of another plane, freed by the deletes). -# -# This script does ONLY the destructive subnet swap. Gateways, managed, dns, the reserved -# bands, and provider-vip are the standup's job -- run AFTER this: -# scripts/phase-00-maas-standup.sh --apply (build provider-vip + reserves + attrs) -# scripts/phase-00-maas-standup.sh (dry-run: expect all-SKIP, no drift) -# -# PRE-REQS: openstack model torn down + hosts released, so the subnets have no live links. -# If MAAS refuses a delete (interfaces still linked), the error is surfaced and we STOP -- -# clear the links (release/delete the machines) and re-run. We never force-delete. -# -# Exit: 0 ok (or nothing to migrate) | 1 fatal / unexpected state | 2 precondition -# CLI forms per Canonical MAAS how-to-manage-networks. ASCII + LF. - -set -euo pipefail -shopt -s inherit_errexit 2>/dev/null || true - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# shellcheck source=scripts/lib-net.sh -. "$SCRIPT_DIR/lib-net.sh" - -MAAS_PROFILE="${MAAS_PROFILE:-admin}" -MODE="dryrun"; [ "${1:-}" = "--apply" ] && MODE="apply" -FATAL=0 - -hdr() { echo; echo "=== $* ==="; } -note() { echo " - $*"; } -fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } -need_jq || exit 1 - -# read wrapper: valid JSON or "[]" so a stray MAAS error never crashes us under set -e. -maas_json() { local out; out="$(maas "$MAAS_PROFILE" "$@" 2>/dev/null || true)"; printf '%s' "$out" | jq empty 2>/dev/null && printf '%s' "$out" || printf '[]'; } - -emit() { # - local desc="$1"; shift - if [ "$MODE" = "apply" ]; then - echo " DO: $desc" - local out - if ! out="$(maas "$MAAS_PROFILE" "$@" 2>&1)"; then - fail "$desc" - echo " MAAS said: $(printf '%s' "$out" | grep -viE '^(Success|Machine-readable)' | head -3 | tr '\n' ' ')" >&2 - return 1 - fi - else - echo " WOULD: $desc" - echo " maas $MAAS_PROFILE $*" - fi -} - -sub_id() { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.id|tostring)' | head -1; } -sub_vid() { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.vid|tostring)' | head -1; } -sub_fabid() { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.fabric_id|tostring)' | head -1; } -sub_space() { maas_json subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.space // "")' | head -1; } -vlanobj() { maas_json vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.id|tostring)' | head -1; } -ipr_ids_on(){ maas_json ipranges read | jq -r --arg s "$1" '.[]|select((.subnet.id|tostring)==$s)|(.id|tostring)'; } -allocs_on() { maas "$MAAS_PROFILE" subnet ip-addresses "$1" 2>/dev/null | jq -r 'if type=="array" then (.[]|.ip // .start_ip // empty) else empty end' 2>/dev/null || true; } - -# --- migration table: name|old_cidr|new_cidr|kind|vid (vid=0 for untagged) --- -MIG="$(cat < D-058 mode=$MODE" -note "reuse-in-place: new subnet created on each plane's EXISTING fabric + VLAN; spaces untouched." - -# ---------------------------------------------------------------- AUDIT (capture) -declare -A M_FAB M_VID M_OLDSUB M_NEW -ORDER=(); PENDING=0 -hdr "audit (read-only): resolve + verify each migrating plane by its OLD cidr" -while IFS='|' read -r name ocidr ncidr kind vid; do - [ -n "$name" ] || continue - osub="$(sub_id "$ocidr")" - if [ -z "$osub" ]; then note "$name: no subnet at old $ocidr -- already migrated or absent; SKIP"; continue; fi - curspace="$(sub_space "$ocidr")" - if [ "$curspace" != "$name" ]; then - note "$name: old $ocidr is now space '$curspace' (not '$name') -- already migrated or not this plane; SKIP"; continue; fi - want_vid=$([ "$kind" = tagged ] && echo "$vid" || echo 0) - gotvid="$(sub_vid "$ocidr")" - if [ "$gotvid" != "$want_vid" ]; then - fail "$name: subnet $ocidr on VID '$gotvid', expected $want_vid -- refusing"; continue; fi - fab="$(sub_fabid "$ocidr")" - ranges="$(ipr_ids_on "$osub" | tr '\n' ' ')" - allocs="$(allocs_on "$osub" | tr '\n' ' ')" - M_FAB["$name"]="$fab"; M_VID["$name"]="$want_vid"; M_OLDSUB["$name"]="$osub"; M_NEW["$name"]="$ncidr" - ORDER+=("$name"); PENDING=$((PENDING+1)) - note "$name: $ocidr (subnet $osub, fabric $fab, vid $want_vid) -> $ncidr on the SAME fabric/vid" - [ -n "${ranges// }" ] && note " reserved range ids to delete first: $ranges" - [ -n "${allocs// }" ] && note " NOTE live IP allocations present ($allocs) -- if a delete is refused, clear these (release/delete machines) and re-run" -done <<< "$MIG" - -hdr "fabric summary (eyeball before any mutation)" -note "metal fabric (metal-admin/metal-internal) = ${M_FAB[metal-admin]:-${M_FAB[metal-internal]:-?}}" -note "data fabric (data-tenant) = ${M_FAB[data-tenant]:-?}" -note "provider fabric (provider-vip target, handled by standup) = resolve via provider-public 10.12.4.0/22" - -[ "$FATAL" -eq 0 ] || { echo; echo "completed with $FATAL failure(s) -- fix the unexpected state above before proceeding"; exit 1; } -if [ "$PENDING" -eq 0 ]; then hdr "result"; note "nothing to migrate (no old-scheme subnets present)"; echo; echo "OK ($MODE)"; exit 0; fi - -# ------------------------------------------------------------------------- PLAN -if [ "$MODE" = dryrun ]; then - hdr "PLAN (dry-run -- nothing changed)" - echo " 1) delete (ranges then subnet), all $PENDING old subnets first:" - for n in "${ORDER[@]}"; do echo " - $n delete subnet ${M_OLDSUB[$n]} (was the old CIDR)"; done - echo " 2) create new subnets on the same fabric/VLAN (collision-free after the deletes):" - for n in "${ORDER[@]}"; do echo " - $n create ${M_NEW[$n]} on fabric ${M_FAB[$n]} vid ${M_VID[$n]}"; done - echo " 3) then: scripts/phase-00-maas-standup.sh --apply (provider-vip + gateways + dns + reserves)" - echo " scripts/phase-00-maas-standup.sh (verify: all-SKIP, no drift)" - echo - echo " re-run with --apply to execute." - exit 0 -fi - -# ----------------------------------------------------------------------- MUTATE -hdr "MUTATE 1/2: delete old subnets (ranges first), collision-safe" -for n in "${ORDER[@]}"; do - osub="${M_OLDSUB[$n]}" - for rid in $(ipr_ids_on "$osub"); do emit "delete iprange $rid (on $n old subnet $osub)" iprange delete "$rid" || true; done - emit "delete subnet id=$osub ($n old CIDR)" subnet delete "$osub" || true -done -[ "$FATAL" -eq 0 ] || { echo; echo "delete phase hit $FATAL failure(s) -- STOP (likely live interface links; clear them and re-run). No new subnets created."; exit 1; } - -hdr "MUTATE 2/2: create new subnets on the existing fabric/VLAN" -for n in "${ORDER[@]}"; do - fab="${M_FAB[$n]}"; vid="${M_VID[$n]}"; ncidr="${M_NEW[$n]}" - vobj="$(vlanobj "$fab" "$vid")" - [ -n "$vobj" ] || { fail "$n: cannot resolve VLAN obj for fabric $fab vid $vid -- the VLAN should persist after subnet delete; aborting before create"; continue; } - emit "create subnet $ncidr on fabric $fab vid $vid (vlan obj $vobj)" subnets create cidr="$ncidr" vlan="$vobj" -done - -[ "$FATAL" -eq 0 ] || { echo; echo "completed with $FATAL failure(s)"; exit 1; } -hdr "next" -echo " run: scripts/phase-00-maas-standup.sh --apply (provider-vip + gateways + dns + reserves)" -echo " then: scripts/phase-00-maas-standup.sh (verify: all-SKIP, no drift)" -echo; echo "OK ($MODE)" diff --git a/scripts/phase-00-maas-standup.sh b/scripts/phase-00-maas-standup.sh index 01db208..b5bf831 100644 --- a/scripts/phase-00-maas-standup.sh +++ b/scripts/phase-00-maas-standup.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # scripts/phase-00-maas-standup.sh [--apply] # -# MAAS topology stand-up for the v1 (VR0 / Baldurkeep) plane scheme (D-058). +# MAAS topology stand-up for the v1 (VR0 / Baldurkeep) plane scheme (D-052 / D-053). # Idempotently brings MAAS to the target fabric/VLAN/subnet/space layout so the # carve + bundle can resolve every plane. Useful BOTH for a fresh test cloud # (everything absent -> full create plan) and for an existing cloud (present-and- @@ -11,23 +11,24 @@ # no hardcoded MAAS ids) and prints each mutation it WOULD run, changing nothing. # Pass --apply to execute. Re-runnable; anything already correct is SKIPped. # -# SAFETY: this script NEVER deletes. A re-CIDR (a subnet present at a CIDR that -# D-058 reassigns to a different plane -- e.g. the live D-052/053 cloud where -# 10.12.8/22 is metal-admin but D-058 wants it for provider-vip) is DESTRUCTIVE -# (MAAS cannot change a subnet's CIDR in place) and is therefore OUT OF SCOPE: -# it is reported in the DRIFT section as MIGRATE-NEEDED and gated to a separate -# human teardown step. This script will refuse to create a target subnet whose -# CIDR is occupied by the wrong plane. +# SAFETY: this script NEVER deletes. The target is the D-052/D-053 plane scheme, +# which the live test cloud already matches -- so a dry-run reports "no drift" and +# --apply is a no-op here. The refuse-to-clobber guard remains for a FRESH cloud +# (Roosevelt): if a subnet is present at a target CIDR but bound to the WRONG plane +# or VID, that is a destructive re-CIDR (MAAS cannot change a CIDR in place) and is +# OUT OF SCOPE -- it is reported in the DRIFT section and gated to a human teardown +# step. This script will refuse to create a target subnet whose CIDR is occupied +# by the wrong plane. # -# SINGLE MAAS-address authority (D-058 consolidation): owns topology (fabric/VLAN/ -# subnet/space/gateway/managed/dns) AND every reserved range -- API-VIP bands, the -# Neutron FIP pool, and mgmt reserves. phase-00-maas-carve.sh is RETIRED: its FIP/ -# VIP/mgmt reserves are folded in here, and its gated stale-range delete is subsumed -# by the teardown + re-CIDR step. +# SINGLE MAAS-address authority: owns topology (fabric/VLAN/subnet/space/gateway/ +# managed/dns) AND every reserved range -- API-VIP bands, the Neutron FIP pool, and +# mgmt reserves. phase-00-maas-carve.sh is RETIRED: its FIP/VIP/mgmt reserves are +# folded in here. # # Order matters (MAAS semantics + fresh-fabric bootstrap): untagged base planes -# first (each owns a fabric), then their tagged siblings ride that fabric, so a -# fresh MAAS bootstraps provider-public -> provider-vip, metal-admin -> metal-internal. +# first (each owns a fabric), then their tagged sibling rides that fabric, so a +# fresh MAAS bootstraps metal-admin -> metal-internal (VID 103). provider-public +# carries the public API VIPs + the FIP pool on one untagged plane (Pattern A). # # Exit: 0 ok (no drift) | 1 fatal or unresolved drift | 2 precondition # CLI forms verified against Canonical MAAS how-to-manage-networks. @@ -82,16 +83,15 @@ vlanspace() { case "$1" in "<"*) return;; esac; maas_json vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.space // "")' | head -1; } vlan0obj() { vlanobj "$1" 0; } # the untagged (vid 0) default VLAN of a fabric -# --- target plane table (D-058): name|cidr|kind|vid|parent_cidr|gw|dnssrc|reserves +# --- target plane table (D-052/D-053): name|cidr|kind|vid|parent_cidr|gw|dnssrc|reserves # reserves = ";"-separated "lo:hi:label" entries (or "-"); label has no : ; | # kind=untagged owns a fabric; kind=tagged rides parent_cidr's fabric on . # "-" = none. dnssrc = a CIDR whose dns_servers to mirror, or "-". PLANES="$(cat < nothing to drift if [ "$curspace" != "$name" ]; then - note "DRIFT: $cidr is space '$curspace' but D-058 assigns it to '$name' -- MIGRATE (delete+recreate; gated, NOT done here)" + note "DRIFT: $cidr is space '$curspace' but the D-052/D-053 scheme assigns it to '$name' -- MIGRATE (delete+recreate; gated, NOT done here)" WRONG_CIDR["$cidr"]=1; DRIFT=$((DRIFT+1)); continue fi if [ "$kind" = tagged ]; then @@ -232,4 +232,4 @@ [ "$DRIFT" -eq 0 ] || echo " $DRIFT plane(s) need a gated re-CIDR/migration before they can be stood up (see DRIFT)." if [ "$FATAL" -ne 0 ]; then echo " completed with $FATAL failure(s)"; exit 1; fi if [ "$DRIFT" -ne 0 ]; then echo " OK ($MODE) -- but exit 1 due to unresolved drift"; exit 1; fi -echo " OK ($MODE) -- topology consistent with D-058" +echo " OK ($MODE) -- topology consistent with D-052/D-053" diff --git a/scripts/phase-00-teardown.sh b/scripts/phase-00-teardown.sh index eb8120e..e52b3fa 100644 --- a/scripts/phase-00-teardown.sh +++ b/scripts/phase-00-teardown.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash # scripts/phase-00-teardown.sh [--apply] [--no-prompt] # -# Gated teardown for the D-058 reconfigure. Destroys the `openstack` Juju model and +# Gated teardown for the Pattern A revert (D-060). Destroys the `openstack` Juju model and # deletes the orphaned `capi-mgmt` MAAS machine, so the hosts release to Ready for the -# re-CIDR/standup/rebuild. HARD-EXCLUDES the management substrate (juju, lxd, tailscale): +# Pattern A re-carve/standup-verify/rebuild. HARD-EXCLUDES the management substrate (juju, lxd, tailscale): # those system_ids are resolved and asserted OUT of every target set -- the script # refuses to run if a target ever resolves to a protected machine. # @@ -153,5 +153,5 @@ done for p in "${PROTECTED[@]}"; do note "PROTECTED $p -> $(status_of "$p") (unchanged)"; done -echo; echo "next: scripts/phase-00-maas-recidr.sh (audit) -- once openstack0-3 are Ready" +echo; echo "next (hosts Ready): 8_lbaas libvirt-net removal -> carve-host-interfaces.sh (Pattern A) -> phase-00-maas-standup.sh (verify D-052/D-053)" echo "OK (apply)" diff --git a/scripts/provider-bundle-check.py b/scripts/provider-bundle-check.py new file mode 100644 index 0000000..159b377 --- /dev/null +++ b/scripts/provider-bundle-check.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" +provider-bundle-check.py -- focused, fail-closed QA for the Pattern A provider revert (D-060). + +Asserts ONLY the post-revert (D-052/D-053 + Pattern A) provider invariants on a +Charmed-OpenStack bundle: + 1. exactly 11 API charms bind public -> provider-public; none remain on provider-vip + 2. every clustered VIP is a triple: provider-public(10.12.4/22) admin(10.12.8/22) + internal(10.12.12/22), all sharing one last octet in 50-60 + 3. ovn-chassis bridge-interface-mappings carries ALL FOUR chassis MACs, INCLUDING + openstack0's -- the Pattern A revert re-adds the openstack0 MAC that D-057 trimmed + for the now-dead provider-vip plane. + +This does NOT re-review the whole bundle. It is the deploy-gate that REPLACES the retired +scripts/d057-bundle-check.py (which asserted the now-superseded D-057 provider-vip end +state). FAIL -> exit 1. ASCII-only output. +""" +import sys, re, ipaddress +try: + import yaml +except ImportError: + sys.stderr.write("ERROR: PyYAML not installed (pip install pyyaml --break-system-packages)\n"); sys.exit(2) + +PROVIDER = ipaddress.ip_network("10.12.4.0/22") # public API VIPs + FIPs share this plane (Pattern A) +ADMIN = ipaddress.ip_network("10.12.8.0/22") +INTERNAL = ipaddress.ip_network("10.12.12.0/22") +OCTET_LO, OCTET_HI = 50, 60 +EXPECT_PUBLIC_VIP = 11 +EXPECT_CHASSIS_MACS = { + "52:54:00:3d:fd:54", # openstack0 -- re-added by the Pattern A revert (D-057 had trimmed it) + "52:54:00:9d:63:77", # openstack1 + "52:54:00:89:7f:ce", # openstack2 + "52:54:00:99:fc:c2", # openstack3 +} +MAC_RE = re.compile(r"[0-9a-f]{2}(?::[0-9a-f]{2}){5}") + +def main(): + path = sys.argv[1] if len(sys.argv) > 1 else "bundle.yaml" + try: + doc = yaml.safe_load(open(path, encoding="utf-8")) + except Exception as e: + sys.stderr.write("ERROR: cannot parse %s: %s\n" % (path, e)); return 2 + apps = (doc or {}).get("applications", {}) or {} + fails, oks = [], [] + + def pub(s): return ((s or {}).get("bindings", {}) or {}).get("public") + on_vip = sorted(n for n, s in apps.items() if pub(s) == "provider-vip") + on_public = sorted(n for n, s in apps.items() if pub(s) == "provider-public") + if on_vip: + fails.append("public still on provider-vip (must be reverted to provider-public): %s" % ", ".join(on_vip)) + if len(on_public) != EXPECT_PUBLIC_VIP: + fails.append("public->provider-public count=%d (expect %d): %s" % (len(on_public), EXPECT_PUBLIC_VIP, ", ".join(on_public))) + else: + oks.append("%d charms bind public->provider-public; none on provider-vip" % len(on_public)) + + vip_ok = 0 + for n, s in apps.items(): + vip = ((s or {}).get("options", {}) or {}).get("vip") + if not vip: + continue + parts = str(vip).split() + if len(parts) != 3: + fails.append("%s vip not a triple: %r" % (n, vip)); continue + prov, adm, intr = parts + try: + okp = ipaddress.ip_address(prov) in PROVIDER + oka = ipaddress.ip_address(adm) in ADMIN + oki = ipaddress.ip_address(intr) in INTERNAL + except ValueError as e: + fails.append("%s bad vip ip: %s" % (n, e)); continue + if not okp: fails.append("%s provider leg %s not in %s" % (n, prov, PROVIDER)); continue + if not oka: fails.append("%s admin leg %s not in %s" % (n, adm, ADMIN)); continue + if not oki: fails.append("%s internal leg %s not in %s" % (n, intr, INTERNAL)); continue + octs = {p.split(".")[-1] for p in parts} + if len(octs) != 1: + fails.append("%s vip octets differ: %r" % (n, vip)); continue + o = int(octs.pop()) + if not (OCTET_LO <= o <= OCTET_HI): + fails.append("%s vip octet .%d outside %d-%d" % (n, o, OCTET_LO, OCTET_HI)); continue + vip_ok += 1 + if vip_ok: + oks.append("%d clustered VIP(s) are provider-public/admin/internal triples, octet 50-60" % vip_ok) + + for n, s in apps.items(): + if (s or {}).get("charm") != "ovn-chassis": + continue + bim = str(((s or {}).get("options", {}) or {}).get("bridge-interface-mappings", "")) + if not bim: + continue + macs = set(MAC_RE.findall(bim.lower())) + missing = EXPECT_CHASSIS_MACS - macs + if missing: + fails.append("%s missing chassis MAC(s): %s" % (n, ", ".join(sorted(missing)))) + else: + oks.append("%s bridge-interface-mappings: all 4 chassis MACs present (incl openstack0)" % n) + + for o in oks: print(" [ok] %s" % o) + for f in fails: print(" [FAIL] %s" % f) + print("\n%s: Pattern A / D-052-D-053 bundle invariants (%s)" % ("PASS" if not fails else "FAIL", path)) + return 1 if fails else 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/provider-vip-standup.sh b/scripts/provider-vip-standup.sh deleted file mode 100644 index a3f6d74..0000000 --- a/scripts/provider-vip-standup.sh +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env bash -# scripts/provider-vip-standup.sh [--apply] -# -# D-057: stand up the provider-vip MAAS plane so the carve + bundle can resolve it. -# Creates, idempotently and in this order (per MAAS semantics: a space attaches to a -# VLAN, and a subnet inherits the space via its VLAN): -# 1. space provider-vip -# 2. VLAN vid=104 on the PROVIDER fabric (the fabric that owns 10.12.4.0/22), -# mtu mirrored from the metal-internal VLAN -# 3. assign that VLAN -> space provider-vip -# 4. subnet 10.12.8.0/22 on that VLAN; gateway_ip + managed + dns set after -# 5. reserved VIP band 10.12.8.2-.100 (VIPs .50-.60 live in it) -# -# Default is DRY-RUN: resolves every id live by NAME/CIDR (PATTERN-1, no hardcoded -# MAAS ids) and prints each mutation it WOULD run, changing nothing. Pass --apply to -# execute. Idempotent: anything already present is SKIPped; re-runnable. -# -# NOTE: VIDs are PER-FABRIC in MAAS, so VID 104 existing on some OTHER fabric (e.g. -# nothing here, but in general) is irrelevant -- only the provider fabric is checked. -# -# MAAS-only by design (portable to Roosevelt -- no virbr1/host assumptions). The -# jumphost L3 gateway (virbr1.104 = 10.12.8.1) and the virbr1 vlan_filtering gate -# are SEPARATE host steps, not part of this script. -# -# Exit: 0 ok | 1 fatal -# -# CLI forms verified against Canonical MAAS docs (how-to-manage-networks): -# vlans create $FABRIC name= vid= mtu= ; vlan update $FABRIC $VID space= ; -# subnets create cidr= vlan= ; subnet update $CIDR key=value ; -# spaces create name= ; ipranges create type=reserved subnet= start_ip= end_ip= - -set -euo pipefail -shopt -s inherit_errexit 2>/dev/null || true - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# shellcheck source=scripts/lib-net.sh -. "$SCRIPT_DIR/lib-net.sh" - -MAAS_PROFILE="${MAAS_PROFILE:-admin}" -FATAL=0 -fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } -note() { echo "NOTE: $*"; } -hdr() { echo; echo "=== $* ==="; } - -MODE="dryrun"; [ "${1:-}" = "--apply" ] && MODE="apply" -need_jq || exit 1 - -# ---- plane definition (constants; lib-net carries the shared CIDR/VID) ------ -PROVIDER_CIDR="10.12.4.0/22" # fabric anchor: VID 104 lives on THIS fabric -PVIP_CIDR="$PROVIDER_VIP_CIDR" # 10.12.8.0/22 -PVIP_VID="$PROVIDER_VIP_VID" # 104 -PVIP_SPACE="provider-vip" -PVIP_GATEWAY="10.12.8.1" # D-057 routed plane; set "" to omit (default-route watch-item) -PVIP_RANGE_LO="10.12.8.2" -PVIP_RANGE_HI="10.12.8.100" - -# ---- live resolvers (read-only; re-queried each call -- the plane mutates) -- -maas_q() { maas "$MAAS_PROFILE" "$@"; } -prov_fabric() { maas_q subnets read | jq -r --arg c "$PROVIDER_CIDR" '.[]|select(.cidr==$c)|.vlan.fabric_id' | head -1; } -subid_of() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|.id' | head -1; } -subvid_of() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|.vlan.vid' | head -1; } -sub_field() { maas_q subnets read | jq -r --arg c "$1" --arg f "$2" '.[]|select(.cidr==$c)|(.[$f] // "")' | head -1; } -space_id() { maas_q spaces read | jq -r --arg n "$PVIP_SPACE" '.[]|select(.name==$n)|.id' | head -1; } -metal_mtu() { maas_q subnets read | jq -r --arg c "$METAL_INTERNAL_CIDR" '.[]|select(.cidr==$c)|.vlan.mtu' | head -1; } -metal_dns() { maas_q subnets read | jq -r --arg c "$METAL_INTERNAL_CIDR" '.[]|select(.cidr==$c)|(.dns_servers // []|join(","))' | head -1; } -# parent_mtu: VID 104 is a child of enp1s0 (the PROVIDER untagged plane), so its MTU must -# track the provider fabric -- NOT metal-internal (a child of a different fabric). A VLAN -# MTU exceeding its parent's would break the interface. -parent_mtu() { maas_q subnets read | jq -r --arg c "$PROVIDER_CIDR" '.[]|select(.cidr==$c)|.vlan.mtu' | head -1; } -# vlan obj id of a vid on a fabric (empty if absent) -vlanobj_on_fab() { maas_q vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|.id' | head -1; } -# current space NAME of a vid on a fabric -vlanspace_on_fab(){ maas_q vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.space // "")' | head -1; } - -# ---- mutation emitter (runs in apply; prints WOULD in dryrun) --------------- -emit() { - local desc="$1"; shift - if [ "$MODE" = "apply" ]; then - echo " DO: $desc" - local out - if ! out="$(maas "$MAAS_PROFILE" "$@" 2>&1)"; then - fail "$desc" - echo " MAAS said: $(printf '%s' "$out" | grep -viE '^(Success|Machine-readable)' | head -3 | tr '\n' ' ')" >&2 - return 1 - fi - else - echo " WOULD: $desc" - echo " maas $MAAS_PROFILE $*" - fi -} - -hdr "provider-vip stand-up mode=$MODE ($PVIP_SPACE / $PVIP_CIDR / VID $PVIP_VID)" - -# ---- 0) resolve + guard the provider fabric -------------------------------- -PROV_FAB="$(prov_fabric)" -[ -n "$PROV_FAB" ] || { fail "provider fabric not found (no MAAS subnet $PROVIDER_CIDR)"; exit 1; } -note "provider fabric_id = $PROV_FAB (VID $PVIP_VID will be created here)" - -# guard: if the target subnet already exists, it MUST be on VID 104 (else misconfigured) -if [ -n "$(subid_of "$PVIP_CIDR")" ]; then - GOTVID="$(subvid_of "$PVIP_CIDR")" - [ "$GOTVID" = "$PVIP_VID" ] || { fail "subnet $PVIP_CIDR exists on VID '$GOTVID', expected $PVIP_VID -- refusing to proceed"; exit 1; } -fi - -# ---- 1) space -------------------------------------------------------------- -hdr "space $PVIP_SPACE" -if [ -z "$(space_id)" ]; then - emit "create space $PVIP_SPACE" spaces create name="$PVIP_SPACE" -else note "space $PVIP_SPACE exists -- SKIP create"; fi - -# ---- 2) VLAN VID 104 on the provider fabric -------------------------------- -hdr "VLAN VID $PVIP_VID on fabric $PROV_FAB" -if [ -z "$(vlanobj_on_fab "$PROV_FAB" "$PVIP_VID")" ]; then - MTU="$(parent_mtu)"; { [ -n "$MTU" ] && [ "$MTU" != "null" ]; } || MTU="1500" - emit "create VLAN vid=$PVIP_VID name=$PVIP_SPACE mtu=$MTU on fabric $PROV_FAB" \ - vlans create "$PROV_FAB" name="$PVIP_SPACE" vid="$PVIP_VID" mtu="$MTU" -else note "VID $PVIP_VID already on fabric $PROV_FAB -- SKIP create"; fi - -# ---- 3) assign VLAN -> space (idempotent) ---------------------------------- -hdr "assign VID $PVIP_VID -> space $PVIP_SPACE" -CURSPACE="$(vlanspace_on_fab "$PROV_FAB" "$PVIP_VID")" -if [ "$CURSPACE" != "$PVIP_SPACE" ]; then - SID_SPACE="$(space_id)"; [ -n "$SID_SPACE" ] || SID_SPACE="" - emit "assign fabric $PROV_FAB vid $PVIP_VID -> space $PVIP_SPACE (id $SID_SPACE)" \ - vlan update "$PROV_FAB" "$PVIP_VID" space="$SID_SPACE" -else note "VID $PVIP_VID already in space $PVIP_SPACE -- SKIP"; fi - -# ---- 4) subnet + gateway/managed/dns --------------------------------------- -hdr "subnet $PVIP_CIDR on VID $PVIP_VID" -if [ -z "$(subid_of "$PVIP_CIDR")" ]; then - VOBJ="$(vlanobj_on_fab "$PROV_FAB" "$PVIP_VID")"; [ -n "$VOBJ" ] || VOBJ="" - emit "create subnet $PVIP_CIDR vlan=$VOBJ" subnets create cidr="$PVIP_CIDR" vlan="$VOBJ" -else note "subnet $PVIP_CIDR exists -- SKIP create"; fi -# gateway (routed plane); only if set and not already correct -if [ -n "$PVIP_GATEWAY" ]; then - CURGW="$(sub_field "$PVIP_CIDR" gateway_ip)" - if [ "$CURGW" != "$PVIP_GATEWAY" ]; then - emit "subnet $PVIP_CIDR -> gateway_ip=$PVIP_GATEWAY" subnet update "$PVIP_CIDR" gateway_ip="$PVIP_GATEWAY" - else note "gateway_ip already $PVIP_GATEWAY -- SKIP"; fi -fi -# managed -if [ "$(sub_field "$PVIP_CIDR" managed)" != "true" ]; then - emit "subnet $PVIP_CIDR -> managed=true" subnet update "$PVIP_CIDR" managed=true -else note "subnet $PVIP_CIDR already managed -- SKIP"; fi -# dns mirrored from metal-internal (only if metal has dns and ours differs) -DNS="$(metal_dns)" -if [ -n "$DNS" ] && [ "$DNS" != "null" ]; then - CURDNS="$(maas_q subnets read | jq -r --arg c "$PVIP_CIDR" '.[]|select(.cidr==$c)|(.dns_servers // []|join(","))' | head -1)" - if [ "$CURDNS" != "$DNS" ]; then - emit "subnet $PVIP_CIDR -> dns_servers=$DNS (mirrors metal-internal)" subnet update "$PVIP_CIDR" dns_servers="$DNS" - else note "dns_servers already $DNS -- SKIP"; fi -else note "metal-internal has no dns_servers -- leaving $PVIP_CIDR dns unset"; fi - -# ---- 5) reserved VIP band -------------------------------------------------- -hdr "reserved VIP band $PVIP_RANGE_LO-$PVIP_RANGE_HI" -if maas_q ipranges read | jq -e --arg lo "$PVIP_RANGE_LO" '.[]|select(.start_ip==$lo)' >/dev/null 2>&1; then - note "reserved range starting $PVIP_RANGE_LO exists -- SKIP" -else - SUB="$(subid_of "$PVIP_CIDR")"; [ -n "$SUB" ] || SUB="" - emit "create reserved range $PVIP_RANGE_LO-$PVIP_RANGE_HI on subnet $SUB" \ - ipranges create type=reserved subnet="$SUB" start_ip="$PVIP_RANGE_LO" end_ip="$PVIP_RANGE_HI" \ - comment="provider-vip API VIP band (D-057)" -fi - -# ---- result ---------------------------------------------------------------- -hdr "resulting provider-vip subnet (live)" -maas_q subnets read | jq -r --arg c "$PVIP_CIDR" '.[]|select(.cidr==$c) - |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, space, managed, gateway_ip, dns_servers}' \ - || note "(dry-run: plane not created yet -- the WOULD lines above are the plan)" - -[ "$FATAL" = 0 ] || { echo; echo "completed with $FATAL failure(s)"; exit 1; } -echo; echo "OK ($MODE)"