#!/usr/bin/env bash
# scripts/phase-06-capi-stack.sh
#
# Phase-06 Step 6.6 (a-f) encapsulated (D-056). Runs on the jumphost; installs the
# CAPI provider stack ON the mgmt VM (all helm/clusterctl/kubectl run VM-side
# against the local apiserver -- matched 1.32.13 kubectl, no jumphost skew).
#
# HARDENED ORDER (D-034 install-ordering): pins -> cert-manager -> ORC ->
# clusterctl init -> CAAPH -> janitor -> verify. ORC precedes `clusterctl init`
# because CAPO's openstackserver controller hard-depends on ORC's
# Image.openstack.k-orc.cloud CRD; installing CAPO first crash-loops until ORC lands.
#
# Versions are READ from the chart tag's dependencies.json at runtime (D-034;
# NEVER hardcoded). The as-built cross-check (CAPI v1.13.2 / CAPO v0.14.4 /
# CERT v1.20.2 / ORC v2.5.0 / CAAPH 0.12.0 / JANITOR 0.11.0 / HELM v3.17.3) is
# informational only. KUBECTL_VERSION tracks the cluster's k8s (the CHANNEL in
# phase-06-k8s-bootstrap.sh); keep them in step.
#
# Each sub-step is gated on the remote block's own exit status (its `--wait` /
# `wait` / `get crd` fail the remote, ssh propagates non-zero, we stop). DOCFIX-021:
# not needed here (no interactive `sudo`; blocks are non-interactive helm/kubectl).
#
# Tunables via env: ENVFILE SSH_KEY CHART_TAG KUBECTL_VERSION
# Requires: jumphost; ssh + the VM key. (jq/curl are installed VM-side by 6.6a.)
# Usage:  bash scripts/phase-06-capi-stack.sh
# Exit:   0 stack up + verified | 1 a sub-step gate failed | 2 precondition
# ASCII + LF.

set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true

ENVFILE="${ENVFILE:-$HOME/capi-mgmt-net.env}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
CHART_TAG="${CHART_TAG:-0.25.1}"
KUBECTL_VERSION="${KUBECTL_VERSION:-v1.32.13}"

command -v ssh >/dev/null 2>&1 || { echo "FAIL: ssh not found" >&2; exit 2; }
[ -f "$ENVFILE" ] || { echo "FAIL: $ENVFILE not found (run phase-06-mgmt-vm.sh first)" >&2; exit 2; }
# shellcheck disable=SC1090
. "$ENVFILE"
[ -n "${MGMT_FIP:-}" ] || { echo "FAIL: MGMT_FIP unset in $ENVFILE" >&2; exit 2; }
[ -f "$SSH_KEY" ]     || { echo "FAIL: ssh key $SSH_KEY not found" >&2; exit 2; }

MGMT_VM="$MGMT_FIP"
SSH_OPTS=(-i "$SSH_KEY" -o BatchMode=yes -o StrictHostKeyChecking=no \
          -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10)

# run_step LABEL -- reads the remote block from stdin, tees indented output,
# gates on the REMOTE block's exit status (PIPESTATUS[0]); positional args after
# the label are passed to the remote `bash -s`.
run_step() {
  local label="$1"; shift
  echo "=== $label ==="
  ssh "${SSH_OPTS[@]}" ubuntu@"$MGMT_VM" bash -s "$@" 2>&1 | sed 's/^/  /'
  local rc=${PIPESTATUS[0]}
  [ "$rc" -eq 0 ] || { echo "GATE FAIL: $label (remote rc=$rc)" >&2; exit 1; }
  echo "[OK] $label"
}

# --- 6.6a: tooling + pins (read dependencies.json @ CHART_TAG) ---
run_step "6.6a tooling + pins (chart $CHART_TAG, kubectl $KUBECTL_VERSION)" "$CHART_TAG" "$KUBECTL_VERSION" <<'REOF'
set -euo pipefail
TAG="$1"; KVER="$2"
sudo apt-get update -qq </dev/null && sudo apt-get install -y jq curl </dev/null

# kubeconfig for the local apiserver (VM's own tenant IP), readable by ubuntu
mkdir -p "$HOME/.kube"; sudo k8s config </dev/null > "$HOME/.kube/config"; chmod 600 "$HOME/.kube/config"

# egress pre-check (informational; a 404 at a host root still proves reachability)
for h in https://raw.githubusercontent.com https://get.helm.sh https://github.com https://dl.k8s.io; do
  printf '%s -> ' "$h"; curl -s -o /dev/null -w '%{http_code}\n' "$h" || echo FAIL
done

# version constellation from the chart tag's dependencies.json (D-034)
curl -fsSL "https://raw.githubusercontent.com/azimuth-cloud/capi-helm-charts/${TAG}/dependencies.json" -o "$HOME/deps.json"
CAPI=$(jq -r '."cluster-api"' "$HOME/deps.json")
CAPO=$(jq -r '."cluster-api-provider-openstack"' "$HOME/deps.json")
CERT=$(jq -r '."cert-manager"' "$HOME/deps.json")
ORC=$(jq -r '."openstack-resource-controller"' "$HOME/deps.json")
CAAPH=$(jq -r '."addon-provider"' "$HOME/deps.json")
JANITOR=$(jq -r '."cluster-api-janitor-openstack"' "$HOME/deps.json")
HELM=$(jq -r '.helm' "$HOME/deps.json")
{ echo "CAPI=$CAPI"; echo "CAPO=$CAPO"; echo "CERT=$CERT"; echo "ORC=$ORC"; \
  echo "CAAPH=$CAAPH"; echo "JANITOR=$JANITOR"; echo "HELM=$HELM"; } > "$HOME/capi-pins.env"
echo "== pins (cross-check: CAPI v1.13.2 CAPO v0.14.4 CERT v1.20.2 ORC v2.5.0 CAAPH 0.12.0 JANITOR 0.11.0 HELM v3.17.3) =="
cat "$HOME/capi-pins.env"
# gate: every pin resolved (non-empty, non-null) -- a moved/renamed key must fail loud
for k in CAPI CAPO CERT ORC CAAPH JANITOR HELM; do v="${!k}"; [ -n "$v" ] && [ "$v" != null ] || { echo "PIN-FAIL: $k=$v" >&2; exit 1; }; done

curl -fsSL "https://get.helm.sh/helm-${HELM}-linux-amd64.tar.gz" -o /tmp/helm.tgz
sudo tar -xzf /tmp/helm.tgz -C /usr/local/bin --strip-components=1 linux-amd64/helm </dev/null
curl -fsSL "https://github.com/kubernetes-sigs/cluster-api/releases/download/${CAPI}/clusterctl-linux-amd64" -o /tmp/clusterctl
sudo install -m 0755 /tmp/clusterctl /usr/local/bin/clusterctl </dev/null
curl -fsSL "https://dl.k8s.io/release/${KVER}/bin/linux/amd64/kubectl" -o /tmp/kubectl
sudo install -m 0755 /tmp/kubectl /usr/local/bin/kubectl </dev/null
echo "== tooling =="; helm version --short; clusterctl version; kubectl version --client 2>/dev/null | head -1
REOF

# --- 6.6b: cert-manager (DOCFIX-025a: crds.enabled=true) ---
run_step "6.6b cert-manager" <<'REOF'
set -euo pipefail
source "$HOME/capi-pins.env"
helm repo add jetstack https://charts.jetstack.io
helm repo update
helm upgrade --install cert-manager jetstack/cert-manager \
  --namespace cert-manager --create-namespace \
  --version "$CERT" --set crds.enabled=true --wait --timeout 5m
kubectl -n cert-manager wait --for=condition=Available deploy --all --timeout=180s
kubectl -n cert-manager get pods
REOF

# --- 6.6c: ORC (BEFORE clusterctl init) ---
run_step "6.6c ORC (before clusterctl init)" <<'REOF'
set -euo pipefail
source "$HOME/capi-pins.env"
kubectl apply --server-side -f \
  "https://github.com/k-orc/openstack-resource-controller/releases/download/${ORC}/install.yaml"
kubectl -n orc-system wait --for=condition=Available deploy --all --timeout=180s
kubectl get crd images.openstack.k-orc.cloud
REOF

# --- 6.6d: clusterctl init ---
run_step "6.6d clusterctl init" <<'REOF'
set -euo pipefail
source "$HOME/capi-pins.env"
clusterctl init \
  --core "cluster-api:${CAPI}" \
  --bootstrap "kubeadm:${CAPI}" \
  --control-plane "kubeadm:${CAPI}" \
  --infrastructure "openstack:${CAPO}"
for ns in capi-system capi-kubeadm-bootstrap-system capi-kubeadm-control-plane-system capo-system; do
  echo "== $ns =="; kubectl -n "$ns" wait --for=condition=Available deploy --all --timeout=240s
done
REOF

# --- 6.6e: CAAPH + janitor ---
run_step "6.6e CAAPH + janitor" <<'REOF'
set -euo pipefail
source "$HOME/capi-pins.env"
helm repo add capi-addon   https://azimuth-cloud.github.io/cluster-api-addon-provider
helm repo add capi-janitor https://azimuth-cloud.github.io/cluster-api-janitor-openstack
helm repo update
helm upgrade --install cluster-api-addon-provider capi-addon/cluster-api-addon-provider \
  --namespace capi-addon-system --create-namespace --version "$CAAPH" --wait --timeout 5m
helm upgrade --install cluster-api-janitor-openstack capi-janitor/cluster-api-janitor-openstack \
  --namespace capi-janitor-system --create-namespace --version "$JANITOR" --wait --timeout 5m
kubectl -n capi-addon-system   get pods
kubectl -n capi-janitor-system get pods
REOF

# --- 6.6f: verify the stack (EXIT GATE) ---
run_step "6.6f verify stack (all controllers Running + key CRDs)" <<'REOF'
set -euo pipefail
clusterctl version
echo "== controllers =="
kubectl get pods -A | grep -E 'capi-|capo-|cert-manager|orc-system|janitor|addon' || true
notready=$(kubectl get pods -A --no-headers 2>/dev/null \
  | grep -E 'capi-|capo-|cert-manager|orc-system|janitor|addon' \
  | awk '$4!="Running"{print $1"/"$2" "$4}')
if [ -n "$notready" ]; then echo "NOT-RUNNING:"; echo "$notready"; exit 1; fi
echo "== key CRDs =="
kubectl get crd clusters.cluster.x-k8s.io \
  openstackclusters.infrastructure.cluster.x-k8s.io \
  kubeadmcontrolplanes.controlplane.cluster.x-k8s.io \
  images.openstack.k-orc.cloud
echo "STACK: OK"
REOF

echo "Summary: CAPI provider stack installed + verified on the mgmt VM (chart $CHART_TAG pins; ORC-before-init order). Phase-06 complete."
