diff --git a/bundle.yaml b/bundle.yaml index 93c676b..8e2aca9 100644 --- a/bundle.yaml +++ b/bundle.yaml @@ -4,30 +4,30 @@ # Generated: 2026-05-22 (rebuild revision 2026-06-01, bundle-cleanup change-set) # Replaces: bundle-pre-destroy.yaml (Bobcat 2023.2) # Charm channels: verified against Charmhub 2026-05-22 (see Caracal_Rebuild handoff D-002) -# Bindings: public:provider, else:metal for API charms; all-metal for backend charms. +# Bindings: public:provider-vip (D-057), else:metal-* for API charms; all-metal for backend charms. # Ceph data nets via public/cluster BINDINGS on ceph-mon/ceph-osd (these provision the # container/host NICs; ceph-*-network config would NOT). Ceph CLIENTS bind ceph->storage, # and each subordinate's storage/data binding is mirrored on its PRINCIPAL (subset rule). (C2) # Endpoints: IP-ONLY -- os-public-hostname dropped on all API charms; the dual VIPs ARE the -# catalog endpoints (public 10.12.4.N / internal+admin 10.12.8.N). Vault issues +# catalog endpoints (public 10.12.8.N / internal+admin 10.12.12.N; D-057). Vault issues # per-VIP IP-SAN certs. No control-plane DNS dependency. (B5) # HA chain: hacluster subordinates + dual VIPs + :ha relations ACTIVE for 11 API charms # (10 prior + ceph-radosgw, un-deferred). VIPs front-loaded into the MAAS-reserved -# /26: provider 10.12.4.2-.63, metal 10.12.8.2-.63 (supersedes .224-.254). (B1) +# /26: provider 10.12.4.2-.63, metal 10.12.12.2-.63 (supersedes .224-.254). (B1) # Vault: single unit, MYSQL storage backend (via vault-mysql-router). etcd + easyrsa # REMOVED -- the etcd backend was never used (live storage = mysql) and is moot at # 1 unit; HA backend (Raft vs etcd) is a Roosevelt rehearsal item. (C1; revises D-006) # Ceph networks: FULL separation via network-space BINDINGS -- ceph-mon/ceph-osd public->storage -# (10.12.16.0/22), ceph-osd cluster->replication (10.12.20.0/22). Bindings, NOT +# (10.12.20.0/22), ceph-osd cluster->replication (10.12.20.0/22). Bindings, NOT # ceph-*-network config, so the LXD-contained mon actually gets a storage NIC. # Clients bind ceph->storage; container principals carry it too (subset rule). (C2) # Magnum: Layer A only -- CAPI driver graft is Layer B (runbooks/phase-06..08) # Octavia: lb-mgmt PKI options supplied via overlays/octavia-pki.yaml (gitignored). # Amphora-pipeline options baked (use-internal-endpoints etc.). (B4) -# OVN tunnels: geneve overlay on the DATA space (10.12.12.0/22) -- ovn-chassis + ovn-chassis-octavia +# OVN tunnels: geneve overlay on the DATA space (10.12.16.0/22) -- ovn-chassis + ovn-chassis-octavia # 'data' binding; their principals also carry data (nova-compute:neutron-plugin bare-metal, # octavia:ovsdb-cms provisions the container NIC) per the subset rule. Prereq: enp8s0 -# link-subnet to 10.12.12.4N (rebuild-prep, machines Ready). +# link-subnet to 10.12.16.4N (rebuild-prep, machines Ready). # Resources: omitted -- let charms use latest available resource revisions # ============================================================ @@ -72,7 +72,8 @@ # Network-space bindings (D-052): EXPLICIT per-application blocks, no anchors. # "" -> metal-admin (operator/MAAS/monitoring; admin API; default) # internal/shared-db/amqp/certificates/cluster/identity/ovsdb -> metal-internal -# public -> provider-public (public API + floating IPs) +# public -> provider-vip (public API VIPs, tagged VID 104; D-057) +# floating IPs -> provider-public (FIP pool .5-.7 stays on the untagged provider plane) # ceph public -> storage ; ceph cluster -> replication # geneve overlay -> data-tenant (nova-compute:neutron-plugin, ovn-chassis:data, # ovn-chassis-octavia:data, octavia:ovsdb-cms) @@ -150,7 +151,7 @@ num_units: 1 # 3 on Roosevelt (D-009) to: [lxd:8] options: - vip: "10.12.4.50 10.12.8.50 10.12.12.50" # B1 front-loaded VIP; IS the catalog endpoint (B5, no os-public-hostname) + vip: "10.12.8.50 10.12.12.50 10.12.16.50" # B1 front-loaded VIP; IS the catalog endpoint (B5, no os-public-hostname) use-policyd-override: true # as-built reconcile 2026-06-09 (origin untraced -- Review-later) bindings: '': metal-admin @@ -165,7 +166,7 @@ internal: metal-internal keystone-fid-service-provider: metal-internal keystone-middleware: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal websso-trusted-dashboard: metal-internal constraints: arch=amd64 @@ -189,7 +190,7 @@ num_units: 1 to: [lxd:11] options: - vip: "10.12.4.53 10.12.8.53 10.12.12.53" # B1 + vip: "10.12.8.53 10.12.12.53 10.12.16.53" # B1 image-conversion: true # as-built; image conversion enabled (raw on Ceph-backed glance) bindings: '': metal-admin @@ -203,7 +204,7 @@ image-service: metal-internal internal: metal-internal object-store: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal storage-backend: metal-internal constraints: arch=amd64 @@ -245,7 +246,7 @@ options: console-access-protocol: novnc network-manager: Neutron - vip: "10.12.4.56 10.12.8.56 10.12.12.56" # B1 + vip: "10.12.8.56 10.12.12.56 10.12.16.56" # B1 bindings: '': metal-admin amqp: metal-internal @@ -264,7 +265,7 @@ neutron-api: metal-internal nova-cell-api: metal-internal placement: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal shared-db-cell: metal-internal constraints: arch=amd64 @@ -314,7 +315,7 @@ num_units: 1 to: [lxd:11] options: - vip: "10.12.4.59 10.12.8.59 10.12.12.59" # B1 + vip: "10.12.8.59 10.12.12.59 10.12.16.59" # B1 bindings: '': metal-admin amqp: metal-internal @@ -324,7 +325,7 @@ identity-service: metal-internal internal: metal-internal placement: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal constraints: arch=amd64 @@ -350,7 +351,7 @@ enable-ml2-port-security: true flat-network-providers: physnet1 neutron-security-groups: true - vip: "10.12.4.55 10.12.8.55 10.12.12.55" # B1 + vip: "10.12.8.55 10.12.12.55 10.12.16.55" # B1 bindings: '': metal-admin amqp: metal-internal @@ -362,7 +363,7 @@ neutron-api: metal-internal neutron-plugin-api: metal-internal neutron-plugin-api-subordinate: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal constraints: arch=amd64 @@ -409,7 +410,6 @@ ovn-bridge-mappings: physnet1:br-ex prefer-chassis-as-gw: true # B2 -- elects gateway chassis so tenant routers get external egress bridge-interface-mappings: >- - br-ex:52:54:00:3d:fd:54 br-ex:52:54:00:9d:63:77 br-ex:52:54:00:89:7f:ce br-ex:52:54:00:99:fc:c2 @@ -438,7 +438,7 @@ options: block-device: None glance-api-version: 2 - vip: "10.12.4.52 10.12.8.52 10.12.12.52" # B1 + vip: "10.12.8.52 10.12.12.52 10.12.16.52" # B1 bindings: '': metal-admin amqp: metal-internal @@ -452,7 +452,7 @@ identity-service: metal-internal image-service: metal-internal internal: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal storage-backend: metal-internal constraints: arch=amd64 # owns the relation -- but the binding still provisions the NIC. @@ -520,7 +520,7 @@ to: [lxd:8] options: source: *ceph-source - vip: "10.12.4.60 10.12.8.60 10.12.12.60" # B1 -- radosgw HA un-deferred for Roosevelt fidelity (decorative HA on testcloud) + vip: "10.12.8.60 10.12.12.60 10.12.16.60" # B1 -- radosgw HA un-deferred for Roosevelt fidelity (decorative HA on testcloud) bindings: '': metal-admin certificates: metal-internal @@ -531,7 +531,7 @@ internal: metal-internal mon: storage object-store: metal-internal - public: provider-public + public: provider-vip radosgw-user: metal-internal s3: metal-internal constraints: arch=amd64 @@ -547,7 +547,7 @@ to: [lxd:10] options: debug: "false" - vip: "10.12.4.58 10.12.8.58 10.12.12.58" # B1 -- browse HTTPS by IP (B5); ALLOWED_HOSTS must permit the VIP IP (verify at deploy) + vip: "10.12.8.58 10.12.12.58 10.12.16.58" # B1 -- browse HTTPS by IP (B5); ALLOWED_HOSTS must permit the VIP IP (verify at deploy) bindings: '': metal-admin application-dashboard: metal-internal @@ -557,7 +557,7 @@ dashboard-plugin: metal-internal ha: metal-internal identity-service: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal website: metal-internal websso-fid-service-provider: metal-internal @@ -595,7 +595,7 @@ # juju deploy ./bundle.yaml \ # --overlay overlays/vr0-dc0-testcloud.yaml \ # --overlay overlays/octavia-pki.yaml - vip: "10.12.4.57 10.12.8.57 10.12.12.57" # B1 + vip: "10.12.8.57 10.12.12.57 10.12.16.57" # B1 bindings: '': metal-admin amqp: metal-internal @@ -608,7 +608,7 @@ neutron-openvswitch: metal-internal ovsdb-cms: data-tenant ovsdb-subordinate: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal constraints: arch=amd64 # subset for the subordinate's data binding (subset rule). @@ -654,7 +654,7 @@ to: [lxd:11] options: openstack-origin: *openstack-origin - vip: "10.12.4.51 10.12.8.51 10.12.12.51" # B1 + vip: "10.12.8.51 10.12.12.51 10.12.16.51" # B1 bindings: '': metal-admin amqp: metal-internal @@ -663,7 +663,7 @@ ha: metal-internal identity-service: metal-internal internal: metal-internal - public: provider-public + public: provider-vip secrets: metal-internal shared-db: metal-internal constraints: arch=amd64 @@ -707,7 +707,7 @@ options: openstack-origin: *openstack-origin region: RegionOne - vip: "10.12.4.54 10.12.8.54 10.12.12.54" # B1 + vip: "10.12.8.54 10.12.12.54 10.12.16.54" # B1 bindings: '': metal-admin amqp: metal-internal @@ -716,7 +716,7 @@ ha: metal-internal identity-service: metal-internal internal: metal-internal - public: provider-public + public: provider-vip shared-db: metal-internal constraints: arch=amd64 diff --git a/docs/D-057-DECIDED-append.md b/docs/D-057-DECIDED-append.md new file mode 100644 index 0000000..258bfcf --- /dev/null +++ b/docs/D-057-DECIDED-append.md @@ -0,0 +1,37 @@ +> SUPERSEDED IN PART BY D-058 (full plane renumber, 2026-06-29): provider-vip is now +> **10.12.8.0/22** (gateway 10.12.8.1), not 10.12.24.0/22. metal-admin->.12, metal-internal->.16, +> data-tenant->.20, oob->.60. The .24/.8/.12 values below are the original D-057 instantiation, +> kept for history; the durable framework (separate tagged routed VIP plane) is unchanged. +> See D-058-renumber.md for the authoritative scheme. + +## D-057: Provider NIC L3-on-OVS-uplink -- split the public API VIP plane onto its own routed VLAN (2026-06-27) + +**Status:** DECIDED. Root cause CONFIRMED live (this session) and direction CONFIRMED by operator. Supersedes the standalone PROPOSED/OPEN note `D-057-provider-uplink-l3-separation.md` and resolves its one open question (the D-003B interaction) in favor of option (a)/(c). Implementation pending: MAAS plane add -> `carve-host-interfaces.sh` delta -> bundle delta -> teardown/redeploy -> D-011 re-validate. grep-before-assign confirmed D-057 free (max prior D-053; D-054/055/056 are DOCFIX). + +**Symptom:** every provider-ext floating IP (pool 10.12.5.0-10.12.7.254) unreachable cloud-wide; the .4.x public API VIPs answered. Blocked phase-06 Step 6.3 (SSH to capi-mgmt-v2 via FIP 10.12.7.107). + +**Root cause (CONFIRMED, not inferred -- measured on all three ovn-chassis hosts this session):** a two-consumer collision on the untagged provider NIC `enp1s0`. (1) ovn-chassis is configured `bridge-interface-mappings: br-ex:` and wants `enp1s0` as an OVS `br-ex` port. (2) ~11 `public: provider-public` API charms are LXD containers; Juju attaches a container to a subnet by bridging the host NIC, which created the Linux bridge `br-enp1s0` and captured `enp1s0`. A single untagged physical NIC cannot be both a Linux-bridge member and an OVS `br-ex` port: the Linux bridge won, `br-ex` was starved (no-carrier on all three), FIPs dark while the containerized .4.x VIPs kept answering. Evidence: `br-ex` operstate=down / carrier=Invalid-argument on openstack1/2/3; `enp1s0 master br-enp1s0` on all three; `br-enp1s0` ports = `enp1s0` + container `veth*` taps (1/3/5 taps); host static (.41/.42/.43) riding `br-enp1s0`; no public VIP on any host (all in containers); host default route via metal-admin `10.12.8.1`, provider `10.12.4.1` only on-link. The host has NO provider-routing role -- only its containers do. + +**Why option (b) was rejected:** "remove the host provider static" addresses consumer (1)'s L3 but NOT consumer (2). `br-enp1s0` exists for container attach; removing the host static cannot release `enp1s0`. Refuted by the measured `veth` taps. + +**Decision (a)/(c):** relocate the container `public` attach off the untagged uplink onto a tagged sub-interface, freeing untagged `enp1s0` for OVS `br-ex`. This is the Canonical shared-NIC pattern and is the exact mirror of the existing metal-internal tagged-secondary stack (`br-metal.103 -> br-internal`). Because a single CIDR cannot span two L2s, the public API VIPs re-IP onto a new subnet on the tagged VLAN -- which removes D-003B's same-L2 property and replaces it with L3 routing (see amendment below). + +**Target interface tree (per host; octet N = .40-.43 by HOST_OCTET):** +- `enp1s0` -- RAW, untagged on fabric 1_provider, **NO L3 / NO subnet link / NO bridge**. ovn-chassis MAC-enslaves it into OVS `br-ex`. (The old host static 10.12.4.N is removed FROM THIS UPLINK -- that removal is the fix.) +- `enp1s0.104` (VLAN, VID 104, on fabric 1_provider) -> `br-prov-api` (standard bridge) -> **STATIC 10.12.24.N**. Carries the new `provider-vip` space. Container `public` endpoints bind here. The host's provider-plane presence MOVES from `enp1s0` to `br-prov-api` (tagged, OVS-free, no `br-ex` competition -> zero D-057-class risk), mirroring metal-internal's `br-internal` host static -- the proven container-attach pattern on this cloud. (L3-less `br-prov-api` considered and rejected: unproven for Juju attach here; not worth gambling the redeploy.) + +**New plane (framework, not final subnetting):** `provider-vip` -- 10.12.24.0/22, VID 104, fabric 1_provider, **routed** (gateway 10.12.24.1), VIP reserve 10.12.24.2-.100 (VIPs .50-.60). The subnet/VID values are this deployment's instantiation; the DURABLE decision is the framework: the public API VIP plane is its own tagged, routed plane distinct from the provider ext_net/FIP plane. The DC-DC byte-aligned plan adopts the same split (its provider VLAN 240 -> 240 ext_net + 241 provider-vip; its already-present NN-11 role is the home). + +**provider-public after the split:** keeps only FIP/ext_net (10.12.5.0-10.12.7.254) + the OVN gateway SNAT + mgmt reserve; untagged enp1s0 -> OVS br-ex; NO container attach, NO host L3. Neutron stays flat physnet1 (no retype) -- the OVN/Neutron layer, confirmed correct this session, is untouched. + +**D-003B AMENDMENT:** D-003B deliberately co-located public API VIPs and FIPs on one provider L2 ("tenant->API by construction") -- a Bobcat-era convenience that this session proved unworkable on the NIC-limited host (the OVS-vs-container collision). It is hereby amended: the public API VIP plane is a separate, routed VLAN. tenant->API is preserved via L3 routing (tenant SNAT egress on provider 10.12.4.0/22 -> gateway -> provider-vip 10.12.24.0/22), and re-validated per D-011 #3. This is also better for the commercial hard-isolation goal (API VIPs out of the FIP broadcast domain; an L3 policy enforcement point) and matches the Roosevelt NIC-limited reality. + +**ROUTED-GATEWAY PREREQUISITE (gating, highest risk):** today only provider-public and metal-admin route (as-built: all other planes `gateway: none`). provider-vip must be routed: gateway 10.12.24.1 must exist and route to/from 10.12.4.0/22 on whatever owns 10.12.4.1 (jumphost/libvirt), with the return path 10.12.24.1 -> 10.12.4.0/22. Without it the redeploy passes D-011 #1/#2 and silently fails #3. Confirm/establish before redeploy -- do not discover at validation. + +**bridge-interface-mappings:** trim openstack0's provider MAC `52:54:00:3d:fd:54` (measured: the only one of the four with no ovn-chassis unit; nova-compute/ovn-chassis run on machines 9/10/11 = openstack1/2/3). Keep `9d:63:77`/`89:7f:ce`/`99:fc:c2`. Settles openstack0 as control+storage role (feeds Roosevelt node-role split). The mapping still targets the untagged `enp1s0` MAC (now free). + +**bundle changes (summary):** on the 11 `public:`-bound API charms, rebind `public: provider-public -> public: provider-vip`, and re-IP each `vip:` first token `10.12.4.X -> 10.12.24.X` (X=50-60; metal-admin .8.X and metal-internal .12.X tokens unchanged). Trim the openstack0 MAC. No Neutron/OVN config change. + +**Roosevelt relevance:** the provider-vip tagged-routed-plane split joins metal-internal VID-103 as a per-host interface-tree region-invariant (maas-as-built reference). The DC-DC byte-aligned plan inherits the split. + +**Related:** root-causes phase-06 6.3; amends D-003B; extends D-052/D-053 plane model with the host-interface-layer rule the rebuild exposed; the committed `docs/maas-as-built-reference.md` carve table needs the follow-on correction (enp1s0 raw/L3-less + the enp1s0.104->br-prov-api stack). diff --git a/docs/D-057-REVIEW-ITEMS.md b/docs/D-057-REVIEW-ITEMS.md new file mode 100644 index 0000000..c11ecd8 --- /dev/null +++ b/docs/D-057-REVIEW-ITEMS.md @@ -0,0 +1,58 @@ +# D-057 -- review items for END OF DEPLOYMENT (do not action mid-deploy) + +These are real findings surfaced while building the D-057 provider-vip split. None +block the deploy; each is deferred to a post-D-011 reconciliation sweep so we don't +re-architect inside a step. Logged here so they are not lost. + +-------------------------------------------------------------------------------- +R1. provider-vip-maas-standup.md CREATE blocks are now redundant with the script. + scripts/provider-vip-standup.sh is the tested execution path for creating the + plane. The runbook's Phase-2 create blocks duplicate that logic (two sources of + truth -- this is how the MTU-source bug nearly drifted between them). KEEP the + runbook for its still-unique parts: Phase-1 audit, the virbr1 vlan_filtering gate, + and the deferred jumphost-gateway reads. RECONCILE: trim the create blocks to a + pointer at the script, or annotate them "superseded -- see script". (User: noted + for end-of-deployment review, not trimmed now.) + +-------------------------------------------------------------------------------- +R2. scripts/review-bundle.py is STALE (pre-D-052) and is NOT a current gate. + Run against today's committed bundle it reports ~30 FAILs: its PHANTOM_BINDING_KEYS + check forbids exactly the per-endpoint bindings (shared-db/amqp/certificates/cluster/ + ha/internal) that D-052 deliberately added, and its VIP check expects DUAL VIPs at + octets .224-235 in the old 10.12.8 "metal" net -- the bundle has TRIPLE VIPs at + .50-.60 across provider-public/metal-admin/metal-internal. So review-bundle.py + predates D-052 entirely and does not describe the live bundle. + - The "verify_bundle.py / 8/8 harness" referenced in the redeploy notes is NOT at + this repo HEAD (d575a25) -- only review-bundle.py is. + - INTERIM GATE for the D-057 change: scripts/d057-bundle-check.py (focused, fail- + closed, proven FAIL-on-pre / PASS-on-post). It checks only the D-057 invariants. + - RECONCILE: either bring review-bundle.py forward to D-052/053/D-057 (rewrite + PHANTOM check, VIP check to triple + provider-vip 10.12.24, octets 50-60), or + restore/commit the newer verify_bundle.py and retire review-bundle.py. + +-------------------------------------------------------------------------------- +R3. Committed bundle uses machines 8/9/10/11; the live cloud ran 0/1/2/3. + Repo-fidelity gap (the committed bundle is not the as-deployed bundle). Does NOT + affect D-057 (the carve is hostname-based; the MAC trim is by MAC). RECONCILE the + bundle machine block + `to:` placements to whatever the redeploy actually uses. + +-------------------------------------------------------------------------------- +R4. oob CIDR -- RESOLVED by D-058. oob adopts 10.12.60.0/22 (the design-docs value). + The live virsh power gateway 10.12.64.1 -> 10.12.60.1 (scripts/lib-hosts.sh + VIRSH_POWER_ADDRESS) is part of the D-058 foundation cascade -- see D-058-renumber.md. + +-------------------------------------------------------------------------------- +R5. gateway_ip default-route watch-item (post-deploy, not end-of-deploy). + provider-vip subnet carries gateway_ip=10.12.8.1 (post-D-058). This mirrors provider-public's + existing .4.1 (which already coexists with metal-admin .8.1 without hijacking the + node default), so risk is low -- but VERIFY after redeploy that every API unit's + default route is still via metal-admin 10.12.12.1, not .8.1: + juju exec --all -- ip route show default + If any unit defaults via .8.1: blank the subnet gateway_ip (set PVIP_GATEWAY="" in + scripts/provider-vip-standup.sh and re-apply) or pin the node default-gateway subnet + to metal-admin. provider-vip reachability does not depend on its own gateway in v1. + +-------------------------------------------------------------------------------- +R6. ovn-chassis-octavia has no bridge-interface-mappings (expected -- it is the + octavia-side chassis). Left unchanged. Noted only so it is not mistaken for an + omission during review. diff --git a/docs/D-058-renumber.md b/docs/D-058-renumber.md new file mode 100644 index 0000000..590f13a --- /dev/null +++ b/docs/D-058-renumber.md @@ -0,0 +1,77 @@ +# D-058: full plane renumber -- clean fabric-grouped /22 scheme (2026-06-29) + +**Status:** DECIDED (operator). Supersedes the D-057 minimal-delta placement of +provider-vip at 10.12.24.0/22, and resolves R4 (oob). grep-before-assign: D-058 free +(max prior D-057; D-054/055/056 are DOCFIX). + +**What:** renumber the v1 plane scheme so CIDRs are contiguous /22 blocks grouped by +fabric and ordered to match the layer model, instead of the historical scatter. This +is a cloud-wide re-IP, intentionally larger than D-057, accepted by the operator for +Roosevelt addressing fidelity. It is executed as a teardown/redeploy (no in-place +re-CIDR), so there is no transient subnet overlap. + +## The map (authoritative) + +| Plane | old | NEW | gateway (was -> now) | +|-----------------|---------------|---------------|-----------------------------| +| provider-public | 10.12.4.0/22 | 10.12.4.0/22 | 10.12.4.1 (unchanged) | +| provider-vip | 10.12.24.0/22 | 10.12.8.0/22 | 10.12.24.1 -> 10.12.8.1 | +| metal-admin | 10.12.8.0/22 | 10.12.12.0/22 | 10.12.8.1 -> 10.12.12.1 | +| metal-internal | 10.12.12.0/22 | 10.12.16.0/22 | none (L2 east-west) | +| data-tenant | 10.12.16.0/22 | 10.12.20.0/22 | none (isolated L2) | +| storage | 10.12.32.0/22 | 10.12.32.0/22 | none (unchanged) | +| replication | 10.12.36.0/22 | 10.12.36.0/22 | none (unchanged) | +| oob | 10.12.64.0/22 | 10.12.60.0/22 | 10.12.64.1 -> 10.12.60.1 | + +Rotate rule (collision-safe): 8->12, 12->16, 16->20, 24->8, 64->60; 4/32/36 fixed. +VLAN IDs unchanged (metal-internal VID 103, provider-vip VID 104). VIP triple becomes +provider-vip .8.5x / metal-admin .12.5x / metal-internal .16.5x (octets 50-60). +Host statics (.40-.43) follow each plane. metal-admin PXE-DHCP band -> 10.12.12.9-.11. + +## JUMPHOST ORDERING TRAP (must respect on the host) + +The jumphost owns three gateways that move. provider-vip's NEW gateway 10.12.8.1 +is metal-admin's OLD address. So on the jumphost, in this order: + 1. move virbr2 (metal-admin) 10.12.8.1 -> 10.12.12.1 + 2. move virbr7 (oob) 10.12.64.1 -> 10.12.60.1 + 3. THEN add virbr1.104 (provider-vip) = 10.12.8.1 <-- only after step 1 frees .8.1 +Adding virbr1.104=.8.1 while virbr2 still holds .8.1 is a same-subnet collision. In a +clean rebuild the bridges are reconfigured as a set, but the free-then-claim order +still applies. (Step 3 is the jumphost-provider-vip-gateway.md runbook.) + +## APEX / NetBox note (IaC discipline) + +NetBox is the apex; this renumber's authoritative home is NetBox. BUT the committed +netbox/ipv4-prefixes-import.py is itself stale (pre-D-052: only Metal/Provider/LBaaS-mgmt, +provider VLAN VID 240, API VIPs at .224-.254 -- none of the 6-plane D-052/053 model). It +must FIRST be brought current to D-052/053, THEN carry the D-058 scheme, before it can be +the source of truth. Until that reconciliation, scripts/lib-net.sh is the working contract +and already carries D-058. Do NOT hand-edit downstream MAAS for these values once NetBox +is current -- regenerate. + +## DONE in this pack (renumbered + re-validated: both suites ALL PASS, d057-check PASS) + +scripts/lib-net.sh (PLANE_CIDRS, PLANE_NAME, PLANE_GW, DATA_PLANE_CIDRS, +METAL_INTERNAL_CIDR, PROVIDER_VIP_CIDR, VIP_PREFIX_* triple), +scripts/carve-host-interfaces.sh, scripts/provider-vip-standup.sh, +scripts/d057-bundle-check.py, bundle.yaml (11 VIP triples), both test suites + +fixtures, provider-vip-maas-standup.md, jumphost-provider-vip-gateway.md, README. + +## COMMITTED-FOUNDATION CASCADE (still on the OLD scheme -- next sweep) + +Apply the same rotate (8->12, 12->16, 16->20, 24->8, 64->60; 4/32/36 fixed). These are +in the committed repo, not this pack, and several are prose runbooks -- sweep with care, +NetBox-anchored: + - netbox/ipv4-prefixes-import.py (APEX -- de-stale to D-052/053 first, then D-058) + - netbox/README.md + - scripts/phase-00-maas-carve.sh (METAL_CIDR default 10.12.8 -> 10.12.12; ranges) + - scripts/lib-hosts.sh (VIRSH_POWER_ADDRESS 10.12.64.1 -> 10.12.60.1) + - scripts/review-bundle.py (stale pre-D-052 already -- R2; fold in with that) + - runbooks/phase-00-teardown-maas-reset.md, phase-01-bundle-deploy.md, + phase-03-core-verify.md, phase-04-network-carve.md, phase-05-octavia-enablement.md, + phase-08-workload-cluster-acceptance.md, appendix-A-troubleshooting.md + - docs/maas-as-built-reference.md, docs/design-decisions.md (append D-058), + docs/v1-redeploy-changelog.md, docs/netbox-vip-queue.md + - tests/phase-00-carve/run-tests.sh, tests/phase-04/make_fixtures.py + - jumphost underlay: virbr2 -> 10.12.12.1, virbr7 -> 10.12.60.1 (see ordering trap) + - host-nginx :81 upstream: Horizon -> 10.12.8.58 diff --git a/runbooks/jumphost-provider-vip-gateway.md b/runbooks/jumphost-provider-vip-gateway.md new file mode 100644 index 0000000..d6e81b7 --- /dev/null +++ b/runbooks/jumphost-provider-vip-gateway.md @@ -0,0 +1,149 @@ +# jumphost provider-vip L3 gateway (virbr1.104 = 10.12.8.1) -- D-057 + +Provisions the L3 gateway that makes provider <-> provider-vip routing real on the +jumphost (vopenstack-jesse, 10.17.11.246). provider-vip (10.12.8.0/22, VID 104) +rides the SAME libvirt bridge as provider (virbr1), tagged; the jumphost already +routes between its directly-connected planes (ip_forward=1), so once virbr1.104 = +10.12.8.1 exists, tenant SNAT (on provider 10.12.5-7) reaches the API VIPs on +10.12.8.50-60 and back. + +WHY A RUNBOOK, NOT A SCRIPT: this is a one-time, consequential host change. The real +risk is how persistence interacts with libvirt (virbr1 is libvirt-managed, created at +libvirtd start) -- which a fixture test cannot exercise. It is also NOT portable to +Roosevelt (no virbr1 there; the provider-vip gateway is a physical router/SVI). So it +is gated and human-run, per the project's "human gates own consequential mutations". + +NOT required for: the MAAS plane stand-up, or the carve. The MAAS subnet records +gateway_ip=10.12.8.1 as metadata regardless. REQUIRED before: D-011 #3 (tenant -> +API reachability) and any provider<->provider-vip traffic test. + +================================================================================ +## PHASE 1 -- AUDIT (read-only). Run, paste back; this picks the persistence method. +================================================================================ + +--- BEGIN runbook block: gw-01-audit (RUN ON jumphost) --- +echo "=== G1: virbr1 must pass tagged frames (VID 104). MUST be 0 ===" +cat /sys/class/net/virbr1/bridge/vlan_filtering 2>/dev/null \ + || echo "WARN: virbr1 has no bridge/vlan_filtering node -- investigate before proceeding" + +echo "=== G2: ip_forward must be 1 ===" +cat /proc/sys/net/ipv4/ip_forward + +echo "=== G3: virbr1 detail (is it a bridge? up? who owns it?) ===" +ip -d link show virbr1 | sed -n '1,6p' +ip -br addr show virbr1 + +echo "=== G4: libvirt 1_provider net -- autostart + forward mode (NAT double-NAT note) ===" +sudo virsh net-info 1_provider 2>/dev/null +sudo virsh net-dumpxml 1_provider 2>/dev/null | sed -n '1,30p' + +echo "=== G5: is virbr1 touched by netplan? (decides systemd-vs-netplan persistence) ===" +ls -1 /etc/netplan/ 2>/dev/null +sudo grep -RnE 'virbr1|10\.12\.4\.1|10\.12\.8\.' /etc/netplan/ 2>/dev/null || echo "netplan: no virbr1 / .8 references" + +echo "=== G6: must NOT already exist ===" +ip -br addr show | grep -E 'virbr1\.104|10\.12\.8\.' || echo "clean: no virbr1.104 / .8 yet" +--- END runbook block: gw-01-audit --- + +STOP. Decision from the audit: +- G1 != 0 -> STOP. VID 104 will not traverse virbr1; the tagged-secondary approach + needs rework. This is the same hard gate as the MAAS stand-up. +- G5 shows virbr1 already managed in netplan -> prefer the NETPLAN persistence + variant (Phase 3B) to avoid two managers fighting. +- G5 shows virbr1 is purely libvirt (the expected case) -> use the SYSTEMD ONESHOT + variant (Phase 3A): it orders cleanly after libvirtd and won't race a netplan that + doesn't manage virbr1. +- G4 autostart != yes -> enable it (`sudo virsh net-autostart 1_provider`) so virbr1 + exists at boot before the gateway unit runs. + +================================================================================ +## PHASE 2 -- RUNTIME (reversible; proves it works before persisting) +================================================================================ + +GATE. Brings the gateway up immediately (lost on reboot -- Phase 3 persists it). +Fully reversible via the rollback block. +--- BEGIN runbook block: gw-02-runtime --- +sudo ip link add link virbr1 name virbr1.104 type vlan id 104 +sudo ip addr add 10.12.8.1/22 dev virbr1.104 +sudo ip link set virbr1.104 up +ip -br addr show virbr1.104 +ip route show 10.12.8.0/22 +--- END runbook block: gw-02-runtime --- + +ROLLBACK (if anything looks wrong): + sudo ip link del virbr1.104 + +TEST (after the MAAS plane exists and a host carries a .8 static, e.g. post-carve): + ping -c2 10.12.8.1 # the gateway itself + ping -c2 10.12.8.40 # a host's br-prov-api static (if carved) + # from a provider-plane host, confirm .8 is reachable via the jumphost route + +NOTE (libvirt NAT, cosmetic): 1_provider is forward mode=nat, so .4<->.8 traffic may +be masqueraded to the jumphost's address. It still works statefully (the API does not +care about source IP). If you later want symmetric, un-NATed provider<->provider-vip +routing, add an iptables RETURN rule ahead of the libvirt masquerade for +10.12.4.0/22 <-> 10.12.8.0/22 -- optional, not needed for v1. + +================================================================================ +## PHASE 3 -- PERSISTENCE (pick ONE per the Phase-1 decision) +================================================================================ + +### 3A -- systemd oneshot (RECOMMENDED for libvirt-managed virbr1) +Orders after libvirtd; idempotent (deletes any stale virbr1.104 first). +--- BEGIN runbook block: gw-03a-systemd --- +sudo tee /etc/systemd/system/provider-vip-gw.service >/dev/null <<'UNIT' +[Unit] +Description=provider-vip L3 gateway (virbr1.104 = 10.12.8.1) -- D-057 +After=libvirtd.service network-online.target +Wants=network-online.target +Requires=libvirtd.service +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStartPre=-/sbin/ip link del virbr1.104 +ExecStart=/sbin/ip link add link virbr1 name virbr1.104 type vlan id 104 +ExecStart=/sbin/ip addr add 10.12.8.1/22 dev virbr1.104 +ExecStart=/sbin/ip link set virbr1.104 up +ExecStop=/sbin/ip link del virbr1.104 +[Install] +WantedBy=multi-user.target +UNIT +sudo systemctl daemon-reload +sudo systemctl enable --now provider-vip-gw.service +systemctl --no-pager status provider-vip-gw.service | sed -n '1,6p' +ip -br addr show virbr1.104 +--- END runbook block: gw-03a-systemd --- + +Persistence test (the real proof): `sudo reboot`, then after it returns +`ip -br addr show virbr1.104` must show 10.12.8.1/22 UP. (libvirt 1_provider must be +autostart -- see G4 -- so virbr1 exists when the unit runs.) + +ROLLBACK 3A: + sudo systemctl disable --now provider-vip-gw.service + sudo rm -f /etc/systemd/system/provider-vip-gw.service && sudo systemctl daemon-reload + sudo ip link del virbr1.104 2>/dev/null || true + +### 3B -- netplan (ONLY if G5 showed virbr1 already managed by netplan) +Add a vlans stanza. Risk: if virbr1 is NOT up when netplan runs at boot, the vlan +fails -- which is exactly why 3A is preferred for a libvirt bridge. Use only if your +jumphost already manages virbr1 via netplan. + # in the relevant /etc/netplan/*.yaml, under network:: + # vlans: + # virbr1.104: + # id: 104 + # link: virbr1 + # addresses: [10.12.8.1/22] + # then: sudo netplan try (auto-reverts in 120s if unreachable), then sudo netplan apply +ROLLBACK 3B: remove the stanza; sudo netplan apply. + +================================================================================ +## PHASE 4 -- VERIFY +================================================================================ +--- BEGIN runbook block: gw-04-verify --- +ip -br addr show virbr1.104 # 10.12.8.1/22, UP +ip route show 10.12.8.0/22 # directly-connected via virbr1.104 +cat /proc/sys/net/ipv4/ip_forward # 1 +--- END runbook block: gw-04-verify --- + +DONE when virbr1.104 = 10.12.8.1/22 is UP, survives a reboot (3A), and a provider-plane +host can reach 10.12.8.x through the jumphost. diff --git a/runbooks/provider-vip-maas-standup.md b/runbooks/provider-vip-maas-standup.md new file mode 100644 index 0000000..a02d626 --- /dev/null +++ b/runbooks/provider-vip-maas-standup.md @@ -0,0 +1,192 @@ +# provider-vip MAAS stand-up (D-057) + +> NOTE (end-of-deployment review -- R1, see D-057-REVIEW-ITEMS.md): the PHASE 2 +> create blocks below are now SUPERSEDED by the tested, idempotent, dry-run-default +> `scripts/provider-vip-standup.sh`, which is the preferred execution path. They are +> retained here (not trimmed) for reference and as a manual fallback. PHASE 1 (audit), +> the virbr1 vlan_filtering gate, and the deferred jumphost-gateway reads remain +> uniquely useful. Reconcile (trim-to-pointer or annotate) in the post-D-011 sweep. + +Builds the provider-vip plane in MAAS so the carve and bundle have something to +resolve against. New plane: space `provider-vip`, VLAN VID 104 on the **provider +fabric** (the fabric that owns 10.12.4.0/22), subnet 10.12.8.0/22, reserved VIP +band .24.2-.100. + +RUN ON: the jumphost (`ssh jessea123@10.17.11.246`), MAAS admin profile already +logged in (`maas admin ...`). All values are resolved live by name/CIDR -- no +hardcoded MAAS ids (PATTERN-1). + +PRINCIPLE: PHASE 1 is read-only -- run it and report the output back BEFORE any +create. The audit is designed to surface anything that would change the carve or +bundle (VID 104 already taken, unexpected provider fabric, vlan_filtering=1 on +virbr1, metal-internal mtu/dns differing from assumptions). Do not run PHASE 2 +until PHASE 1 is reviewed. + +SCOPE NOTE: this stands up the MAAS side only. The jumphost L3 gateway +(virbr1.104 = 10.12.8.1) that makes .4<->.8 routing real is a separate host +step (libvirt/netplan, persistence-method TBD from a live read) -- deferred to +its own block, and NOT required for the MAAS plane to be created or for the carve +to resolve. It is required before D-011 #3 (tenant -> API reachability). + +================================================================================ +## PHASE 1 -- AUDIT (read-only; run, then PASTE OUTPUT BACK) +================================================================================ + +--- BEGIN runbook block: pvip-01-audit (RUN ON jumphost) --- +echo "=== A1: provider fabric (owns 10.12.4.0/22) -- VID 104 must live HERE ===" +maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22") + |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, fabric_id:.vlan.fabric_id, + space, gateway_ip, dns_servers}' + +echo "=== A2: metal-internal VID 103 -- the TEMPLATE to mirror (mtu/managed/dns/dhcp) ===" +maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.16.0/22") + |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, vlan_mtu:.vlan.mtu, + vlan_dhcp_on:.vlan.dhcp_on, space, managed, gateway_ip, dns_servers, + allow_dns, allow_proxy, rdns_mode}' + +echo "=== A3: VID 104 collision check -- expect EMPTY on every fabric ===" +maas admin subnets read | jq -r '[.[].vlan|{vid,fabric,id}]|unique_by(.id)[]|select(.vid==104)' +PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') +echo " provider fabric_id = $PROV_FAB ; VLANs already on it:" +maas admin vlans read "$PROV_FAB" | jq -r '.[]|{vid,name,id,space}' + +echo "=== A4: provider-vip must NOT already exist -- expect EMPTY for both ===" +maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.cidr' +maas admin spaces read | jq -r '.[]|select(.name=="provider-vip")|.name' + +echo "=== A5: provider-public reserved ranges -- mirror this pattern for .8 ===" +maas admin ipranges read | jq -r '.[]|select(.start_ip|startswith("10.12.4.")) + |{type, start_ip, end_ip, comment}' + +echo "=== A6: GATE -- virbr1 must pass tagged frames (VID 104). MUST print 0 ===" +cat /sys/class/net/virbr1/bridge/vlan_filtering 2>/dev/null \ + || echo "WARN: virbr1 has no bridge/vlan_filtering node (not a bridge?) -- investigate" + +echo "=== A7: jumphost must not already carry .8 -- expect 'clean' ===" +ip -br addr show | grep -E 'virbr1\.104|10\.12\.8\.' || echo "clean: no .8 on jumphost yet" +--- END runbook block: pvip-01-audit --- + +STOP. Report A1-A7. Expected / change-triggers: +- A1: provider fabric_id is the home for VID 104. Note its value; C2/C3 use it. +- A2: gives mtu (almost certainly 1500) + dns_servers + managed for provider-vip + to mirror. If mtu != 1500, C2 must match it. If dns_servers differ from a prior + assumption, C4b uses the value read here -- not a guessed 10.12.12.1. +- A3: must be EMPTY. If VID 104 is already in use, STOP -- pick another VID and + update lib-net.sh PROVIDER_VIP_VID + the carve assert + this runbook in lockstep. +- A4: both EMPTY. If either exists, a prior partial run happened -- reconcile, do + not blind-create. +- A6: MUST be 0. If 1, STOP -- VID 104 will not traverse virbr1 and the whole + tagged-secondary approach needs rework (per-port VLAN membership, or a different + bridge). This is the make-or-break gate; flag it loudly. + +================================================================================ +## PHASE 2 -- CREATE (only after PHASE 1 reviewed; run ONE block at a time) +================================================================================ + +Re-resolve the helpers at the top of every shell you run these in (they are not +persisted between SSH sessions): + + PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') + MTU_PROV=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.mtu') # VID 104 parent = provider fabric + DNS103=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.16.0/22")|.dns_servers|join(",")') + +GATE C1 -- create the space. +--- BEGIN runbook block: pvip-02-space --- +maas admin spaces create name=provider-vip +maas admin spaces read | jq -r '.[]|select(.name=="provider-vip")|{name,id}' +--- END runbook block: pvip-02-space --- + +GATE C2 -- create VLAN 104 on the PROVIDER fabric, mtu mirroring the PROVIDER untagged VLAN +(VID 104 is a child of enp1s0 on the provider fabric -- its MTU must track that parent, +NOT metal-internal which lives on a different fabric). +(Confirm flags first if unsure: `maas admin vlans create --help`.) +--- BEGIN runbook block: pvip-03-vlan --- +PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') +MTU_PROV=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.mtu') # provider parent, not metal-internal +maas admin vlans create "$PROV_FAB" name=provider-vip vid=104 mtu="$MTU_PROV" +maas admin vlans read "$PROV_FAB" | jq -r '.[]|select(.vid==104)|{vid,name,id,fabric_id,mtu,space}' +--- END runbook block: pvip-03-vlan --- + +GATE C3 -- assign the VID-104 VLAN to the provider-vip space. +(If `space=` is rejected, retry with `space=provider-vip`.) +--- BEGIN runbook block: pvip-04-assign-space --- +PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') +SPACE_ID=$(maas admin spaces read | jq -r '.[]|select(.name=="provider-vip")|.id') +maas admin vlan update "$PROV_FAB" 104 space="$SPACE_ID" +maas admin vlans read "$PROV_FAB" | jq -r '.[]|select(.vid==104)|{vid,id,space}' +--- END runbook block: pvip-04-assign-space --- + +GATE C4 -- create the subnet on the VID-104 VLAN, then set gateway/dns/managed. +Split into create (minimal) + update (confirmed `subnet update` form) to avoid +guessing which flags `subnets create` accepts. +--- BEGIN runbook block: pvip-05-subnet --- +PROV_FAB=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.4.0/22")|.vlan.fabric_id') +VID104_VLANID=$(maas admin vlans read "$PROV_FAB" | jq -r '.[]|select(.vid==104)|.id') +DNS103=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.16.0/22")|.dns_servers|join(",")') + +# 5a: create (minimal -- cidr + vlan) +maas admin subnets create cidr=10.12.8.0/22 vlan="$VID104_VLANID" + +# 5b: routed-plane gateway (D-057) + managed; dns mirrors VID 103 if set +maas admin subnet update 10.12.8.0/22 gateway_ip=10.12.8.1 managed=true +[ -n "$DNS103" ] && maas admin subnet update 10.12.8.0/22 dns_servers="$DNS103" + +maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.8.0/22") + |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, space, managed, gateway_ip, dns_servers}' +--- END runbook block: pvip-05-subnet --- + +GATE C5 -- reserved VIP band .24.2-.100 (VIPs .50-.60 live in it; mirrors .4.2-.100). +--- BEGIN runbook block: pvip-06-range --- +SUB24=$(maas admin subnets read | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.id') +maas admin ipranges create type=reserved subnet="$SUB24" \ + start_ip=10.12.8.2 end_ip=10.12.8.100 comment="provider-vip API VIP band (D-057)" +maas admin ipranges read | jq -r '.[]|select(.start_ip|startswith("10.12.8.")) + |{type, start_ip, end_ip, comment}' +--- END runbook block: pvip-06-range --- + +================================================================================ +## PHASE 3 -- VERIFY (read-only) -- proves the carve will resolve + Juju sees it +================================================================================ + +--- BEGIN runbook block: pvip-07-verify --- +echo "=== carve resolvers, simulated against live MAAS (must match the script) ===" +SUB=$(maas admin subnets read) +echo "subid_of 10.12.8.0/22 = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.id') (expect non-empty)" +echo "vlanid_of 10.12.8.0/22 = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|(.vlan.id // .vlan)') (the VLAN obj id)" +echo "vlanvid_of 10.12.8.0/22 = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.vlan.vid') (MUST be 104)" +echo "space = $(echo "$SUB" | jq -r '.[]|select(.cidr=="10.12.8.0/22")|.space') (MUST be provider-vip)" + +echo "=== Juju visibility (real consumption is at redeploy; this pre-validates) ===" +juju reload-spaces +juju spaces | grep -E 'provider-vip|provider-public' || echo "WARN: provider-vip not visible to Juju" +--- END runbook block: pvip-07-verify --- + +PASS CRITERIA: +- vlanvid_of == 104, space == provider-vip, subid_of non-empty. +- `juju spaces` lists provider-vip. +At this point the carve's PHASE-1 asserts (`no MAAS subnet for 10.12.8.0/22`, +`provider-vip ... expected 104`) will pass, and the files are safe to drop in. + +================================================================================ +## DEFERRED (separate step; needs a live read first) -- jumphost L3 gateway +================================================================================ + +virbr1.104 = 10.12.8.1 on the jumphost makes .4<->.8 routing real (ip_forward +is already on). NOT needed for the MAAS plane or the carve; needed before D-011 +#3. Before writing it I want a live read of how the jumphost defines virbr1 so +the persistence method is correct (libvirt network XML vs netplan vs a systemd +unit -- virbr1 is libvirt-managed, so a naive netplan vlan-on-virbr1 may race +libvirt at boot): + + ip -d link show virbr1 + virsh net-dumpxml 1_provider 2>/dev/null | sed -n '1,40p' + ls /etc/netplan/ ; sudo grep -RnE 'virbr1|10\.12\.4\.1' /etc/netplan/ 2>/dev/null + +Report that and I will write the gated gateway block (with a clean rollback). + +POST-DEPLOY WATCH-ITEM (gateway_ip safeguard): after redeploy, confirm every API +container's DEFAULT route is still via metal-admin 10.12.12.1, NOT 10.12.8.1: + juju exec --all -- ip route show default +If any unit defaults via .24.1, drop the subnet gateway_ip (set to "") or pin the +node default gateway to the metal-admin subnet; provider-vip reachability does +not depend on its own gateway in v1. diff --git a/scripts/carve-host-interfaces.sh b/scripts/carve-host-interfaces.sh index 4d3907e..b5bcba3 100644 --- a/scripts/carve-host-interfaces.sh +++ b/scripts/carve-host-interfaces.sh @@ -7,13 +7,17 @@ # nothing). Pass --apply to execute. # # Target tree (octet N = .40-.43 by host index; see lib-hosts.sh HOST_OCTET): -# enp1s0 raw + STATIC 10.12.4.N (provider-public; ovn-chassis builds br-ex -# OVS at deploy and enslaves enp1s0 by MAC -- -# MAAS must leave enp1s0 RAW) -# enp7s0 --> br-metal (standard bridge) + STATIC 10.12.8.N (metal-admin) +# enp1s0 raw, NO L3, UNTAGGED 1_provider (D-057) -- ovn-chassis MAC-enslaves it +# into OVS br-ex at deploy; the uplink carries NO +# host static (a static here forced a Linux bridge +# that starved br-ex). MAAS must leave enp1s0 RAW. +# enp1s0.104 --> br-prov-api (standard bridge) + STATIC 10.12.8.N (provider-vip; D-057) +# tagged secondary; public API VIP plane + container +# 'public' attach. Mirrors the metal-internal stack. +# enp7s0 --> br-metal (standard bridge) + STATIC 10.12.12.N (metal-admin) # br-metal.103 (vlan, VID 103) -# --> br-internal (standard bridge) + STATIC 10.12.12.N (metal-internal) -# enp8s0 raw + STATIC 10.12.16.N (data-tenant) +# --> br-internal (standard bridge) + STATIC 10.12.16.N (metal-internal) +# enp8s0 raw + STATIC 10.12.20.N (data-tenant) # enp9s0 raw + STATIC 10.12.32.N (storage; Juju auto-bridges at deploy) # enp10s0 raw + STATIC 10.12.36.N (replication; Juju auto-bridges at deploy) # enp11s0 idle (ex-lbaas; no link) @@ -67,17 +71,20 @@ vlanvid_of(){ printf '%s' "$SUBNETS_JSON" | jq -r --arg c "$1" '.[]|select(.cidr==$c)|(.vlan.vid // empty)' | head -1; } # plane CIDRs (verified set; sourced order from lib-net PLANE_CIDRS) -C_PROV="10.12.4.0/22"; C_METAL="10.12.8.0/22"; C_INT="$METAL_INTERNAL_CIDR" # 10.12.12.0/22 -C_DATA="10.12.16.0/22"; C_STOR="10.12.32.0/22"; C_REPL="10.12.36.0/22" +C_PROV="10.12.4.0/22"; C_METAL="10.12.12.0/22"; C_INT="$METAL_INTERNAL_CIDR" # 10.12.16.0/22 +C_PVIP="$PROVIDER_VIP_CIDR" # 10.12.8.0/22 (D-057 provider-vip; tagged VID 104) +C_DATA="10.12.20.0/22"; C_STOR="10.12.32.0/22"; C_REPL="10.12.36.0/22" -# assert all six planes resolve, and the internal plane is really VID 103 -for c in "$C_PROV" "$C_METAL" "$C_INT" "$C_DATA" "$C_STOR" "$C_REPL"; do +# assert all planes resolve, and the tagged planes are really VID 103 / 104 +for c in "$C_PROV" "$C_METAL" "$C_INT" "$C_PVIP" "$C_DATA" "$C_STOR" "$C_REPL"; do [ -n "$(subid_of "$c")" ] || { fail "no MAAS subnet for $c"; } [ -n "$(vlanid_of "$c")" ] || { fail "no VLAN for $c"; } done [ "$FATAL" = 0 ] || exit 1 gotvid="$(vlanvid_of "$C_INT")" [ "$gotvid" = "$METAL_INTERNAL_VID" ] || { fail "metal-internal $C_INT is VID '$gotvid', expected $METAL_INTERNAL_VID"; exit 1; } +gotpvipvid="$(vlanvid_of "$C_PVIP")" +[ "$gotpvipvid" = "$PROVIDER_VIP_VID" ] || { fail "provider-vip $C_PVIP is VID '$gotpvipvid', expected $PROVIDER_VIP_VID"; exit 1; } # interface id by name (live) ifid_of() { maas_q interfaces read "$SID" | jq -r --arg n "$1" '.[]|select(.name==$n)|.id' | head -1; } @@ -107,7 +114,7 @@ # release_self_indexed : release a DISCOVERED (alloc_type 6) record on # so a STATIC can take it. is THIS host's architecturally-indexed metal -# address (10.12.8. from HOST_OCTET), so a discovered observation on it is this +# address (10.12.12. from HOST_OCTET), so a discovered observation on it is this # host's own commissioning ghost. SAFETY: refuse if ANY source positively identifies a # DIFFERENT owner -- the StaticIPAddress node_summary.system_id (when present) must equal # this host's SID, and the discoveries-table MAC (when present) must equal this host's @@ -141,6 +148,7 @@ printf " provider %s sub=%s vlan=%s\n" "$C_PROV" "$(subid_of "$C_PROV")" "$(vlanid_of "$C_PROV")" printf " metal %s sub=%s vlan=%s\n" "$C_METAL" "$(subid_of "$C_METAL")" "$(vlanid_of "$C_METAL")" printf " internal %s sub=%s vlan=%s (vid %s)\n" "$C_INT" "$(subid_of "$C_INT")" "$(vlanid_of "$C_INT")" "$gotvid" +printf " prov-vip %s sub=%s vlan=%s (vid %s)\n" "$C_PVIP" "$(subid_of "$C_PVIP")" "$(vlanid_of "$C_PVIP")" "$gotpvipvid" printf " data %s sub=%s vlan=%s\n" "$C_DATA" "$(subid_of "$C_DATA")" "$(vlanid_of "$C_DATA")" printf " storage %s sub=%s vlan=%s\n" "$C_STOR" "$(subid_of "$C_STOR")" "$(vlanid_of "$C_STOR")" printf " replicat %s sub=%s vlan=%s\n" "$C_REPL" "$(subid_of "$C_REPL")" "$(vlanid_of "$C_REPL")" @@ -157,8 +165,53 @@ emit "$nic(id=$id) -> STATIC $ip on subnet $sub" interface link-subnet "$SID" "$id" mode=STATIC subnet="$sub" ip_address="$ip" } -hdr "provider plane (enp1s0 raw + static)" -carve_raw enp1s0 "$C_PROV" "10.12.4.$OCTET" +# D-057 helper: make a NIC raw + L3-LESS on its plane's UNTAGGED vlan (no subnet link), +# so ovn-chassis can MAC-enslave it into OVS br-ex at deploy. Idempotent. +ensure_raw_unlinked() { + local nic="$1" cidr="$2" id provvlan curvlan lid + id="$(ifid_of "$nic")"; [ -n "$id" ] || { fail "$nic not found on $HN"; return 1; } + provvlan="$(vlanid_of "$cidr")" + # unlink any subnet link(s) first so the uplink carries no L3 + for lid in $(maas_q interfaces read "$SID" | jq -r --arg n "$nic" '.[]|select(.name==$n)|.links[]?|select(.subnet!=null)|.id'); do + emit "unlink $nic(id=$id) link id=$lid (L3-less for OVS br-ex)" interface unlink-subnet "$SID" "$id" id="$lid" + done + # ensure it sits on the UNTAGGED vlan of $cidr -- only if not already there (idempotent) + curvlan="$(maas_q interfaces read "$SID" | jq -r --arg n "$nic" '.[]|select(.name==$n)|(.vlan.id // .vlan // empty)' | head -1)" + if [ "$curvlan" != "$provvlan" ]; then + emit "$nic(id=$id) -> VLAN $provvlan ($cidr untagged, no L3)" interface update "$SID" "$id" vlan="$provvlan" + else + note "$nic already on VLAN $provvlan -- SKIP vlan set" + fi +} + +hdr "provider plane (enp1s0 RAW + L3-LESS -- OVS br-ex uplink; D-057)" +# D-057: the OVS provider uplink must carry NO L3. ovn-chassis MAC-enslaves enp1s0 into +# br-ex at deploy; the old host static 10.12.4.N is REMOVED here (it forced a Linux bridge +# br-enp1s0 that captured enp1s0 and starved br-ex). Leave enp1s0 raw + unlinked. +ensure_raw_unlinked enp1s0 "$C_PROV" + +hdr "provider-vip plane (enp1s0.$PROVIDER_VIP_VID -> br-prov-api -> static; D-057)" +# Tagged secondary on the provider NIC, mirroring metal-internal (br-metal.103 -> br-internal). +# The bundle binds the API charms' 'public' endpoint to the provider-vip space, so containers +# attach HERE (not the untagged uplink). The host provider-plane static MOVES here from enp1s0. +PEID="$(ifid_of enp1s0)"; [ -n "$PEID" ] || fail "enp1s0 not found" +# 1) enp1s0.104 (VLAN, VID 104) on enp1s0 +if [ -z "$(ifid_of "enp1s0.$PROVIDER_VIP_VID")" ]; then + emit "create enp1s0.$PROVIDER_VIP_VID (VID $PROVIDER_VIP_VID, vlan obj $(vlanid_of "$C_PVIP")) parent=enp1s0(id=$PEID)" \ + interfaces create-vlan "$SID" vlan="$(vlanid_of "$C_PVIP")" parent="$PEID" +else note "enp1s0.$PROVIDER_VIP_VID exists -- SKIP create"; fi +[ "$MODE" = apply ] && PVID="$(ifid_of "enp1s0.$PROVIDER_VIP_VID")" || PVID="" +# 2) br-prov-api (standard) on enp1s0.104 +if [ -z "$(ifid_of br-prov-api)" ]; then + emit "create br-prov-api (standard) parent=enp1s0.$PROVIDER_VIP_VID(id=$PVID)" \ + interfaces create-bridge "$SID" name=br-prov-api bridge_type=standard parent="$PVID" +else note "br-prov-api exists -- SKIP create"; fi +[ "$MODE" = apply ] && BPID="$(ifid_of br-prov-api)" || BPID="" +# 3) STATIC on br-prov-api (host provider-plane presence; OVS-free so no br-ex conflict) +if ! linked_to br-prov-api "$C_PVIP"; then + emit "br-prov-api(id=$BPID) -> STATIC 10.12.8.$OCTET on subnet $(subid_of "$C_PVIP")" \ + interface link-subnet "$SID" "$BPID" mode=STATIC subnet="$(subid_of "$C_PVIP")" ip_address="10.12.8.$OCTET" +else note "br-prov-api already on $C_PVIP -- SKIP"; fi hdr "metal stack (enp7s0 -> br-metal -> br-metal.103 -> br-internal)" EID="$(ifid_of enp7s0)"; [ -n "$EID" ] || fail "enp7s0 not found" @@ -174,9 +227,9 @@ else note "br-metal exists -- SKIP create"; fi [ "$MODE" = apply ] && BMID="$(ifid_of br-metal)" || BMID="" if ! linked_to br-metal "$C_METAL"; then - release_self_indexed "10.12.8.$OCTET" "$(subid_of "$C_METAL")" || true - emit "br-metal(id=$BMID) -> STATIC 10.12.8.$OCTET on subnet $(subid_of "$C_METAL")" \ - interface link-subnet "$SID" "$BMID" mode=STATIC subnet="$(subid_of "$C_METAL")" ip_address="10.12.8.$OCTET" + release_self_indexed "10.12.12.$OCTET" "$(subid_of "$C_METAL")" || true + emit "br-metal(id=$BMID) -> STATIC 10.12.12.$OCTET on subnet $(subid_of "$C_METAL")" \ + interface link-subnet "$SID" "$BMID" mode=STATIC subnet="$(subid_of "$C_METAL")" ip_address="10.12.12.$OCTET" else note "br-metal already on $C_METAL -- SKIP"; fi # 3) br-metal.103 (VLAN, VID 103) on br-metal if [ -z "$(ifid_of br-metal.103)" ]; then @@ -191,12 +244,12 @@ else note "br-internal exists -- SKIP create"; fi [ "$MODE" = apply ] && BIID="$(ifid_of br-internal)" || BIID="" if ! linked_to br-internal "$C_INT"; then - emit "br-internal(id=$BIID) -> STATIC 10.12.12.$OCTET on subnet $(subid_of "$C_INT")" \ - interface link-subnet "$SID" "$BIID" mode=STATIC subnet="$(subid_of "$C_INT")" ip_address="10.12.12.$OCTET" + emit "br-internal(id=$BIID) -> STATIC 10.12.16.$OCTET on subnet $(subid_of "$C_INT")" \ + interface link-subnet "$SID" "$BIID" mode=STATIC subnet="$(subid_of "$C_INT")" ip_address="10.12.16.$OCTET" else note "br-internal already on $C_INT -- SKIP"; fi hdr "data / storage / replication (raw + static)" -carve_raw enp8s0 "$C_DATA" "10.12.16.$OCTET" +carve_raw enp8s0 "$C_DATA" "10.12.20.$OCTET" carve_raw enp9s0 "$C_STOR" "10.12.32.$OCTET" carve_raw enp10s0 "$C_REPL" "10.12.36.$OCTET" diff --git a/scripts/d057-bundle-check.py b/scripts/d057-bundle-check.py new file mode 100644 index 0000000..3d8637e --- /dev/null +++ b/scripts/d057-bundle-check.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +d057-bundle-check.py -- focused, fail-closed QA for the D-057 provider-vip split. + +Asserts ONLY the D-057 end-state invariants on a Charmed-OpenStack bundle: + 1. exactly 11 API charms bind public -> provider-vip; none remain on provider-public + 2. every clustered VIP is a triple: provider-vip(10.12.8/22) admin(10.12.12/22) + internal(10.12.16/22), all sharing one last octet in 50-60 + 3. ovn-chassis bridge-interface-mappings has the 3 chassis MACs and NOT openstack0's + +This does NOT re-review the whole bundle. scripts/review-bundle.py predates D-052 +(it forbids the per-endpoint bindings D-052 added) and is not a current gate -- see +docs/design-decisions.md / the end-of-deployment review note. FAIL -> exit 1. +ASCII-only output. +""" +import sys, re, ipaddress +try: + import yaml +except ImportError: + sys.stderr.write("ERROR: PyYAML not installed (pip install pyyaml --break-system-packages)\n"); sys.exit(2) + +PVIP = ipaddress.ip_network("10.12.8.0/22") +ADMIN = ipaddress.ip_network("10.12.12.0/22") +INTERNAL = ipaddress.ip_network("10.12.16.0/22") +OCTET_LO, OCTET_HI = 50, 60 +EXPECT_PUBLIC_VIP = 11 +OPENSTACK0_MAC = "52:54:00:3d:fd:54" +EXPECT_CHASSIS_MACS = {"52:54:00:9d:63:77", "52:54:00:89:7f:ce", "52:54:00:99:fc:c2"} +MAC_RE = re.compile(r"[0-9a-f]{2}(?::[0-9a-f]{2}){5}") + +def main(): + path = sys.argv[1] if len(sys.argv) > 1 else "bundle.yaml" + try: + doc = yaml.safe_load(open(path, encoding="utf-8")) + except Exception as e: + sys.stderr.write("ERROR: cannot parse %s: %s\n" % (path, e)); return 2 + apps = (doc or {}).get("applications", {}) or {} + fails, oks = [], [] + + def pub(s): return ((s or {}).get("bindings", {}) or {}).get("public") + vip_old = sorted(n for n, s in apps.items() if pub(s) == "provider-public") + vip_new = sorted(n for n, s in apps.items() if pub(s) == "provider-vip") + if vip_old: + fails.append("public still on provider-public: %s" % ", ".join(vip_old)) + if len(vip_new) != EXPECT_PUBLIC_VIP: + fails.append("public->provider-vip count=%d (expect %d): %s" % (len(vip_new), EXPECT_PUBLIC_VIP, ", ".join(vip_new))) + else: + oks.append("%d charms bind public->provider-vip; none on provider-public" % len(vip_new)) + + vip_ok = 0 + for n, s in apps.items(): + vip = ((s or {}).get("options", {}) or {}).get("vip") + if not vip: + continue + parts = str(vip).split() + if len(parts) != 3: + fails.append("%s vip not a triple: %r" % (n, vip)); continue + prov, adm, intr = parts + try: + okp = ipaddress.ip_address(prov) in PVIP + oka = ipaddress.ip_address(adm) in ADMIN + oki = ipaddress.ip_address(intr) in INTERNAL + except ValueError as e: + fails.append("%s bad vip ip: %s" % (n, e)); continue + if not okp: fails.append("%s provider leg %s not in %s" % (n, prov, PVIP)); continue + if not oka: fails.append("%s admin leg %s not in %s" % (n, adm, ADMIN)); continue + if not oki: fails.append("%s internal leg %s not in %s" % (n, intr, INTERNAL)); continue + octs = {p.split(".")[-1] for p in parts} + if len(octs) != 1: + fails.append("%s vip octets differ: %r" % (n, vip)); continue + o = int(octs.pop()) + if not (OCTET_LO <= o <= OCTET_HI): + fails.append("%s vip octet .%d outside %d-%d" % (n, o, OCTET_LO, OCTET_HI)); continue + vip_ok += 1 + if vip_ok: + oks.append("%d clustered VIP(s) are provider-vip/admin/internal triples, octet 50-60" % vip_ok) + + for n, s in apps.items(): + if (s or {}).get("charm") != "ovn-chassis": + continue + bim = str(((s or {}).get("options", {}) or {}).get("bridge-interface-mappings", "")) + if not bim: + continue + macs = set(MAC_RE.findall(bim.lower())) + if OPENSTACK0_MAC in macs: + fails.append("%s still maps openstack0 MAC %s (should be trimmed)" % (n, OPENSTACK0_MAC)) + missing = EXPECT_CHASSIS_MACS - macs + if missing: + fails.append("%s missing chassis MAC(s): %s" % (n, ", ".join(sorted(missing)))) + if OPENSTACK0_MAC not in macs and not missing: + oks.append("%s bridge-interface-mappings: 3 chassis MACs present, openstack0 trimmed" % n) + + for o in oks: print(" [ok] %s" % o) + for f in fails: print(" [FAIL] %s" % f) + print("\n%s: D-057 bundle invariants (%s)" % ("PASS" if not fails else "FAIL", path)) + return 1 if fails else 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/lib-net.sh b/scripts/lib-net.sh index 885fefb..6bb17c0 100644 --- a/scripts/lib-net.sh +++ b/scripts/lib-net.sh @@ -19,12 +19,12 @@ fi # --- The six MAAS spaces / planes (D-052 / D-053). --- -PLANE_CIDRS=( "10.12.4.0/22" "10.12.8.0/22" "10.12.12.0/22" "10.12.16.0/22" "10.12.32.0/22" "10.12.36.0/22" ) +PLANE_CIDRS=( "10.12.4.0/22" "10.12.12.0/22" "10.12.16.0/22" "10.12.20.0/22" "10.12.32.0/22" "10.12.36.0/22" ) declare -A PLANE_NAME=( ["10.12.4.0/22"]="provider-public" - ["10.12.8.0/22"]="metal-admin" - ["10.12.12.0/22"]="metal-internal" - ["10.12.16.0/22"]="data-tenant" + ["10.12.12.0/22"]="metal-admin" + ["10.12.16.0/22"]="metal-internal" + ["10.12.20.0/22"]="data-tenant" ["10.12.32.0/22"]="storage" ["10.12.36.0/22"]="replication" ) @@ -34,14 +34,16 @@ STALE_SPACES=( provider metal data fabric-data lbaas ) # Gateways: only provider-public and metal-admin route; the other four are gw=none. -declare -A PLANE_GW=( ["10.12.4.0/22"]="10.12.4.1" ["10.12.8.0/22"]="10.12.8.1" ) +declare -A PLANE_GW=( ["10.12.4.0/22"]="10.12.4.1" ["10.12.12.0/22"]="10.12.12.1" ) # The four non-API, non-PXE planes whose host NICs MAAS must have provisioned. -DATA_PLANE_CIDRS=( "10.12.12.0/22" "10.12.16.0/22" "10.12.32.0/22" "10.12.36.0/22" ) +DATA_PLANE_CIDRS=( "10.12.16.0/22" "10.12.20.0/22" "10.12.32.0/22" "10.12.36.0/22" ) # metal-internal is a TAGGED VLAN bridged on the metal fabric; host links land on br-internal. -METAL_INTERNAL_CIDR="10.12.12.0/22" +METAL_INTERNAL_CIDR="10.12.16.0/22" METAL_INTERNAL_VID="103" +PROVIDER_VIP_CIDR="10.12.8.0/22" # D-057: public API VIP plane (tagged secondary on enp1s0) +PROVIDER_VIP_VID="104" # D-057: tagged VLAN for provider-vip (sibling of 103) METAL_INTERNAL_IFACE="br-internal" # Host identity (hostnames, octets, boot MACs, system_id resolution) now lives in @@ -51,9 +53,9 @@ # Triple HA VIPs (D-020 + D-052): each API charm carries provider/admin/internal columns, # matching last octet, in the .50-.60 band. 11 clustered API charms. -VIP_PREFIX_PROVIDER="10.12.4" -VIP_PREFIX_ADMIN="10.12.8" -VIP_PREFIX_INTERNAL="10.12.12" +VIP_PREFIX_PROVIDER="10.12.8" # provider-vip leg (D-057 moved public VIPs off provider-public .4) +VIP_PREFIX_ADMIN="10.12.12" +VIP_PREFIX_INTERNAL="10.12.16" VIP_OCTET_MIN=50 VIP_OCTET_MAX=60 VIP_COUNT_EXPECT=11 diff --git a/scripts/provider-vip-standup.sh b/scripts/provider-vip-standup.sh new file mode 100644 index 0000000..a3f6d74 --- /dev/null +++ b/scripts/provider-vip-standup.sh @@ -0,0 +1,172 @@ +#!/usr/bin/env bash +# scripts/provider-vip-standup.sh [--apply] +# +# D-057: stand up the provider-vip MAAS plane so the carve + bundle can resolve it. +# Creates, idempotently and in this order (per MAAS semantics: a space attaches to a +# VLAN, and a subnet inherits the space via its VLAN): +# 1. space provider-vip +# 2. VLAN vid=104 on the PROVIDER fabric (the fabric that owns 10.12.4.0/22), +# mtu mirrored from the metal-internal VLAN +# 3. assign that VLAN -> space provider-vip +# 4. subnet 10.12.8.0/22 on that VLAN; gateway_ip + managed + dns set after +# 5. reserved VIP band 10.12.8.2-.100 (VIPs .50-.60 live in it) +# +# Default is DRY-RUN: resolves every id live by NAME/CIDR (PATTERN-1, no hardcoded +# MAAS ids) and prints each mutation it WOULD run, changing nothing. Pass --apply to +# execute. Idempotent: anything already present is SKIPped; re-runnable. +# +# NOTE: VIDs are PER-FABRIC in MAAS, so VID 104 existing on some OTHER fabric (e.g. +# nothing here, but in general) is irrelevant -- only the provider fabric is checked. +# +# MAAS-only by design (portable to Roosevelt -- no virbr1/host assumptions). The +# jumphost L3 gateway (virbr1.104 = 10.12.8.1) and the virbr1 vlan_filtering gate +# are SEPARATE host steps, not part of this script. +# +# Exit: 0 ok | 1 fatal +# +# CLI forms verified against Canonical MAAS docs (how-to-manage-networks): +# vlans create $FABRIC name= vid= mtu= ; vlan update $FABRIC $VID space= ; +# subnets create cidr= vlan= ; subnet update $CIDR key=value ; +# spaces create name= ; ipranges create type=reserved subnet= start_ip= end_ip= + +set -euo pipefail +shopt -s inherit_errexit 2>/dev/null || true + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/lib-net.sh +. "$SCRIPT_DIR/lib-net.sh" + +MAAS_PROFILE="${MAAS_PROFILE:-admin}" +FATAL=0 +fail() { echo "FAIL: $*" >&2; FATAL=$((FATAL+1)); } +note() { echo "NOTE: $*"; } +hdr() { echo; echo "=== $* ==="; } + +MODE="dryrun"; [ "${1:-}" = "--apply" ] && MODE="apply" +need_jq || exit 1 + +# ---- plane definition (constants; lib-net carries the shared CIDR/VID) ------ +PROVIDER_CIDR="10.12.4.0/22" # fabric anchor: VID 104 lives on THIS fabric +PVIP_CIDR="$PROVIDER_VIP_CIDR" # 10.12.8.0/22 +PVIP_VID="$PROVIDER_VIP_VID" # 104 +PVIP_SPACE="provider-vip" +PVIP_GATEWAY="10.12.8.1" # D-057 routed plane; set "" to omit (default-route watch-item) +PVIP_RANGE_LO="10.12.8.2" +PVIP_RANGE_HI="10.12.8.100" + +# ---- live resolvers (read-only; re-queried each call -- the plane mutates) -- +maas_q() { maas "$MAAS_PROFILE" "$@"; } +prov_fabric() { maas_q subnets read | jq -r --arg c "$PROVIDER_CIDR" '.[]|select(.cidr==$c)|.vlan.fabric_id' | head -1; } +subid_of() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|.id' | head -1; } +subvid_of() { maas_q subnets read | jq -r --arg c "$1" '.[]|select(.cidr==$c)|.vlan.vid' | head -1; } +sub_field() { maas_q subnets read | jq -r --arg c "$1" --arg f "$2" '.[]|select(.cidr==$c)|(.[$f] // "")' | head -1; } +space_id() { maas_q spaces read | jq -r --arg n "$PVIP_SPACE" '.[]|select(.name==$n)|.id' | head -1; } +metal_mtu() { maas_q subnets read | jq -r --arg c "$METAL_INTERNAL_CIDR" '.[]|select(.cidr==$c)|.vlan.mtu' | head -1; } +metal_dns() { maas_q subnets read | jq -r --arg c "$METAL_INTERNAL_CIDR" '.[]|select(.cidr==$c)|(.dns_servers // []|join(","))' | head -1; } +# parent_mtu: VID 104 is a child of enp1s0 (the PROVIDER untagged plane), so its MTU must +# track the provider fabric -- NOT metal-internal (a child of a different fabric). A VLAN +# MTU exceeding its parent's would break the interface. +parent_mtu() { maas_q subnets read | jq -r --arg c "$PROVIDER_CIDR" '.[]|select(.cidr==$c)|.vlan.mtu' | head -1; } +# vlan obj id of a vid on a fabric (empty if absent) +vlanobj_on_fab() { maas_q vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|.id' | head -1; } +# current space NAME of a vid on a fabric +vlanspace_on_fab(){ maas_q vlans read "$1" | jq -r --arg v "$2" '.[]|select((.vid|tostring)==$v)|(.space // "")' | head -1; } + +# ---- mutation emitter (runs in apply; prints WOULD in dryrun) --------------- +emit() { + local desc="$1"; shift + if [ "$MODE" = "apply" ]; then + echo " DO: $desc" + local out + if ! out="$(maas "$MAAS_PROFILE" "$@" 2>&1)"; then + fail "$desc" + echo " MAAS said: $(printf '%s' "$out" | grep -viE '^(Success|Machine-readable)' | head -3 | tr '\n' ' ')" >&2 + return 1 + fi + else + echo " WOULD: $desc" + echo " maas $MAAS_PROFILE $*" + fi +} + +hdr "provider-vip stand-up mode=$MODE ($PVIP_SPACE / $PVIP_CIDR / VID $PVIP_VID)" + +# ---- 0) resolve + guard the provider fabric -------------------------------- +PROV_FAB="$(prov_fabric)" +[ -n "$PROV_FAB" ] || { fail "provider fabric not found (no MAAS subnet $PROVIDER_CIDR)"; exit 1; } +note "provider fabric_id = $PROV_FAB (VID $PVIP_VID will be created here)" + +# guard: if the target subnet already exists, it MUST be on VID 104 (else misconfigured) +if [ -n "$(subid_of "$PVIP_CIDR")" ]; then + GOTVID="$(subvid_of "$PVIP_CIDR")" + [ "$GOTVID" = "$PVIP_VID" ] || { fail "subnet $PVIP_CIDR exists on VID '$GOTVID', expected $PVIP_VID -- refusing to proceed"; exit 1; } +fi + +# ---- 1) space -------------------------------------------------------------- +hdr "space $PVIP_SPACE" +if [ -z "$(space_id)" ]; then + emit "create space $PVIP_SPACE" spaces create name="$PVIP_SPACE" +else note "space $PVIP_SPACE exists -- SKIP create"; fi + +# ---- 2) VLAN VID 104 on the provider fabric -------------------------------- +hdr "VLAN VID $PVIP_VID on fabric $PROV_FAB" +if [ -z "$(vlanobj_on_fab "$PROV_FAB" "$PVIP_VID")" ]; then + MTU="$(parent_mtu)"; { [ -n "$MTU" ] && [ "$MTU" != "null" ]; } || MTU="1500" + emit "create VLAN vid=$PVIP_VID name=$PVIP_SPACE mtu=$MTU on fabric $PROV_FAB" \ + vlans create "$PROV_FAB" name="$PVIP_SPACE" vid="$PVIP_VID" mtu="$MTU" +else note "VID $PVIP_VID already on fabric $PROV_FAB -- SKIP create"; fi + +# ---- 3) assign VLAN -> space (idempotent) ---------------------------------- +hdr "assign VID $PVIP_VID -> space $PVIP_SPACE" +CURSPACE="$(vlanspace_on_fab "$PROV_FAB" "$PVIP_VID")" +if [ "$CURSPACE" != "$PVIP_SPACE" ]; then + SID_SPACE="$(space_id)"; [ -n "$SID_SPACE" ] || SID_SPACE="" + emit "assign fabric $PROV_FAB vid $PVIP_VID -> space $PVIP_SPACE (id $SID_SPACE)" \ + vlan update "$PROV_FAB" "$PVIP_VID" space="$SID_SPACE" +else note "VID $PVIP_VID already in space $PVIP_SPACE -- SKIP"; fi + +# ---- 4) subnet + gateway/managed/dns --------------------------------------- +hdr "subnet $PVIP_CIDR on VID $PVIP_VID" +if [ -z "$(subid_of "$PVIP_CIDR")" ]; then + VOBJ="$(vlanobj_on_fab "$PROV_FAB" "$PVIP_VID")"; [ -n "$VOBJ" ] || VOBJ="" + emit "create subnet $PVIP_CIDR vlan=$VOBJ" subnets create cidr="$PVIP_CIDR" vlan="$VOBJ" +else note "subnet $PVIP_CIDR exists -- SKIP create"; fi +# gateway (routed plane); only if set and not already correct +if [ -n "$PVIP_GATEWAY" ]; then + CURGW="$(sub_field "$PVIP_CIDR" gateway_ip)" + if [ "$CURGW" != "$PVIP_GATEWAY" ]; then + emit "subnet $PVIP_CIDR -> gateway_ip=$PVIP_GATEWAY" subnet update "$PVIP_CIDR" gateway_ip="$PVIP_GATEWAY" + else note "gateway_ip already $PVIP_GATEWAY -- SKIP"; fi +fi +# managed +if [ "$(sub_field "$PVIP_CIDR" managed)" != "true" ]; then + emit "subnet $PVIP_CIDR -> managed=true" subnet update "$PVIP_CIDR" managed=true +else note "subnet $PVIP_CIDR already managed -- SKIP"; fi +# dns mirrored from metal-internal (only if metal has dns and ours differs) +DNS="$(metal_dns)" +if [ -n "$DNS" ] && [ "$DNS" != "null" ]; then + CURDNS="$(maas_q subnets read | jq -r --arg c "$PVIP_CIDR" '.[]|select(.cidr==$c)|(.dns_servers // []|join(","))' | head -1)" + if [ "$CURDNS" != "$DNS" ]; then + emit "subnet $PVIP_CIDR -> dns_servers=$DNS (mirrors metal-internal)" subnet update "$PVIP_CIDR" dns_servers="$DNS" + else note "dns_servers already $DNS -- SKIP"; fi +else note "metal-internal has no dns_servers -- leaving $PVIP_CIDR dns unset"; fi + +# ---- 5) reserved VIP band -------------------------------------------------- +hdr "reserved VIP band $PVIP_RANGE_LO-$PVIP_RANGE_HI" +if maas_q ipranges read | jq -e --arg lo "$PVIP_RANGE_LO" '.[]|select(.start_ip==$lo)' >/dev/null 2>&1; then + note "reserved range starting $PVIP_RANGE_LO exists -- SKIP" +else + SUB="$(subid_of "$PVIP_CIDR")"; [ -n "$SUB" ] || SUB="" + emit "create reserved range $PVIP_RANGE_LO-$PVIP_RANGE_HI on subnet $SUB" \ + ipranges create type=reserved subnet="$SUB" start_ip="$PVIP_RANGE_LO" end_ip="$PVIP_RANGE_HI" \ + comment="provider-vip API VIP band (D-057)" +fi + +# ---- result ---------------------------------------------------------------- +hdr "resulting provider-vip subnet (live)" +maas_q subnets read | jq -r --arg c "$PVIP_CIDR" '.[]|select(.cidr==$c) + |{cidr, vid:.vlan.vid, fabric:.vlan.fabric, space, managed, gateway_ip, dns_servers}' \ + || note "(dry-run: plane not created yet -- the WOULD lines above are the plan)" + +[ "$FATAL" = 0 ] || { echo; echo "completed with $FATAL failure(s)"; exit 1; } +echo; echo "OK ($MODE)" diff --git a/tests/carve-host-interfaces/fakebin/maas b/tests/carve-host-interfaces/fakebin/maas new file mode 100644 index 0000000..3a60979 --- /dev/null +++ b/tests/carve-host-interfaces/fakebin/maas @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# fake maas: serves read endpoints from fixture files; dry-run never calls mutations. +prof="${1:-}"; obj="${2:-}"; act="${3:-}" +[ "$obj" = machines ] && [ "$act" = read ] && { cat "${FIX_MACHINES:?}"; exit 0; } +[ "$obj" = machine ] && [ "$act" = read ] && { cat "${FIX_MACHINE:?}"; exit 0; } +[ "$obj" = subnets ] && [ "$act" = read ] && { cat "${FIX_SUBNETS:?}"; exit 0; } +[ "$obj" = interfaces ] && [ "$act" = read ] && { cat "${FIX_IFACES:?}"; exit 0; } +echo "{}"; exit 0 # unexpected (mutations) -- never hit in dry-run diff --git a/tests/carve-host-interfaces/fix/if_done.json b/tests/carve-host-interfaces/fix/if_done.json new file mode 100644 index 0000000..26d50ef --- /dev/null +++ b/tests/carve-host-interfaces/fix/if_done.json @@ -0,0 +1,11 @@ +[ {"name":"enp1s0","id":100,"vlan":{"id":5001},"links":[]}, + {"name":"enp1s0.104","id":104,"vlan":{"id":5004},"links":[]}, + {"name":"br-prov-api","id":300,"vlan":{"id":5004},"links":[{"id":906,"subnet":{"cidr":"10.12.8.0/22"}}]}, + {"name":"enp7s0","id":107,"vlan":{"id":5002},"links":[]}, + {"name":"br-metal","id":200,"vlan":{"id":5002},"links":[{"id":901,"subnet":{"cidr":"10.12.12.0/22"}}]}, + {"name":"br-metal.103","id":201,"vlan":{"id":5003},"links":[]}, + {"name":"br-internal","id":202,"vlan":{"id":5003},"links":[{"id":902,"subnet":{"cidr":"10.12.16.0/22"}}]}, + {"name":"enp8s0","id":108,"vlan":{"id":5005},"links":[{"id":903,"subnet":{"cidr":"10.12.20.0/22"}}]}, + {"name":"enp9s0","id":109,"vlan":{"id":5006},"links":[{"id":904,"subnet":{"cidr":"10.12.32.0/22"}}]}, + {"name":"enp10s0","id":110,"vlan":{"id":5007},"links":[{"id":905,"subnet":{"cidr":"10.12.36.0/22"}}]}, + {"name":"enp11s0","id":111,"vlan":{"id":5001},"links":[]} ] diff --git a/tests/carve-host-interfaces/fix/if_fresh.json b/tests/carve-host-interfaces/fix/if_fresh.json new file mode 100644 index 0000000..6a36bb4 --- /dev/null +++ b/tests/carve-host-interfaces/fix/if_fresh.json @@ -0,0 +1,9 @@ +[ {"name":"enp1s0","id":100,"vlan":{"id":5001},"links":[{"id":900,"subnet":{"cidr":"10.12.4.0/22"}}]}, + {"name":"enp7s0","id":107,"vlan":{"id":5002},"links":[]}, + {"name":"br-metal","id":200,"vlan":{"id":5002},"links":[{"id":901,"subnet":{"cidr":"10.12.12.0/22"}}]}, + {"name":"br-metal.103","id":201,"vlan":{"id":5003},"links":[]}, + {"name":"br-internal","id":202,"vlan":{"id":5003},"links":[{"id":902,"subnet":{"cidr":"10.12.16.0/22"}}]}, + {"name":"enp8s0","id":108,"vlan":{"id":5005},"links":[{"id":903,"subnet":{"cidr":"10.12.20.0/22"}}]}, + {"name":"enp9s0","id":109,"vlan":{"id":5006},"links":[{"id":904,"subnet":{"cidr":"10.12.32.0/22"}}]}, + {"name":"enp10s0","id":110,"vlan":{"id":5007},"links":[{"id":905,"subnet":{"cidr":"10.12.36.0/22"}}]}, + {"name":"enp11s0","id":111,"vlan":{"id":5001},"links":[]} ] diff --git a/tests/carve-host-interfaces/fix/machine.json b/tests/carve-host-interfaces/fix/machine.json new file mode 100644 index 0000000..2b7b974 --- /dev/null +++ b/tests/carve-host-interfaces/fix/machine.json @@ -0,0 +1 @@ +{"status_name":"Ready"} diff --git a/tests/carve-host-interfaces/fix/machines.json b/tests/carve-host-interfaces/fix/machines.json new file mode 100644 index 0000000..34def38 --- /dev/null +++ b/tests/carve-host-interfaces/fix/machines.json @@ -0,0 +1 @@ +[{"hostname":"openstack0","system_id":"node-os0"}] diff --git a/tests/carve-host-interfaces/fix/sub_nopvip.json b/tests/carve-host-interfaces/fix/sub_nopvip.json new file mode 100644 index 0000000..16b1669 --- /dev/null +++ b/tests/carve-host-interfaces/fix/sub_nopvip.json @@ -0,0 +1,50 @@ +[ + { + "id": 1, + "cidr": "10.12.4.0/22", + "vlan": { + "id": 5001, + "vid": 0 + } + }, + { + "id": 2, + "cidr": "10.12.12.0/22", + "vlan": { + "id": 5002, + "vid": 0 + } + }, + { + "id": 3, + "cidr": "10.12.16.0/22", + "vlan": { + "id": 5003, + "vid": 103 + } + }, + { + "id": 5, + "cidr": "10.12.20.0/22", + "vlan": { + "id": 5005, + "vid": 0 + } + }, + { + "id": 6, + "cidr": "10.12.32.0/22", + "vlan": { + "id": 5006, + "vid": 0 + } + }, + { + "id": 7, + "cidr": "10.12.36.0/22", + "vlan": { + "id": 5007, + "vid": 0 + } + } +] diff --git a/tests/carve-host-interfaces/fix/sub_ok.json b/tests/carve-host-interfaces/fix/sub_ok.json new file mode 100644 index 0000000..ccc8236 --- /dev/null +++ b/tests/carve-host-interfaces/fix/sub_ok.json @@ -0,0 +1,7 @@ +[ {"id":1,"cidr":"10.12.4.0/22","vlan":{"id":5001,"vid":0}}, + {"id":2,"cidr":"10.12.12.0/22","vlan":{"id":5002,"vid":0}}, + {"id":3,"cidr":"10.12.16.0/22","vlan":{"id":5003,"vid":103}}, + {"id":4,"cidr":"10.12.8.0/22","vlan":{"id":5004,"vid":104}}, + {"id":5,"cidr":"10.12.20.0/22","vlan":{"id":5005,"vid":0}}, + {"id":6,"cidr":"10.12.32.0/22","vlan":{"id":5006,"vid":0}}, + {"id":7,"cidr":"10.12.36.0/22","vlan":{"id":5007,"vid":0}} ] diff --git a/tests/carve-host-interfaces/fix/sub_wrongvid.json b/tests/carve-host-interfaces/fix/sub_wrongvid.json new file mode 100644 index 0000000..ed3dfb9 --- /dev/null +++ b/tests/carve-host-interfaces/fix/sub_wrongvid.json @@ -0,0 +1,58 @@ +[ + { + "id": 1, + "cidr": "10.12.4.0/22", + "vlan": { + "id": 5001, + "vid": 0 + } + }, + { + "id": 2, + "cidr": "10.12.12.0/22", + "vlan": { + "id": 5002, + "vid": 0 + } + }, + { + "id": 3, + "cidr": "10.12.16.0/22", + "vlan": { + "id": 5003, + "vid": 103 + } + }, + { + "id": 4, + "cidr": "10.12.8.0/22", + "vlan": { + "id": 5004, + "vid": 241 + } + }, + { + "id": 5, + "cidr": "10.12.20.0/22", + "vlan": { + "id": 5005, + "vid": 0 + } + }, + { + "id": 6, + "cidr": "10.12.32.0/22", + "vlan": { + "id": 5006, + "vid": 0 + } + }, + { + "id": 7, + "cidr": "10.12.36.0/22", + "vlan": { + "id": 5007, + "vid": 0 + } + } +] diff --git a/tests/carve-host-interfaces/run-tests.sh b/tests/carve-host-interfaces/run-tests.sh new file mode 100644 index 0000000..34bfbfa --- /dev/null +++ b/tests/carve-host-interfaces/run-tests.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Behavior regression for carve-host-interfaces.sh D-057 provider-vip split. +# Fake `maas` (read-only fixtures) + real jq. Drives the carve in DRY-RUN for openstack0 +# (octet .40) and asserts the emitted WOULD-actions (and absences). +set -uo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CARVE="$(cd "$HERE/../../scripts" && pwd)/carve-host-interfaces.sh" +BIN="$HERE/fakebin"; FIX="$HERE/fix" +chmod +x "$BIN"/* 2>/dev/null || true # GitHub Desktop lands files mode 100644 +command -v jq >/dev/null || { echo "FAIL: jq required"; exit 1; } +rc_all=0; OUT="$(mktemp)" + +run() { #