Newer
Older
openstack-caracal-ipv4 / scripts / repo_lint.py
@JANeumatrix JANeumatrix 13 hours ago 7 KB Patches
#!/usr/bin/env python3
"""
repo_lint.py -- static hygiene lint for the openstack-caracal-ipv4 repo (DOCFIX-074).

Read-only. Catches the drift classes that accumulated silently between the D-052
rebuild and the 2026-07-02 redeploy-readiness sweep (46 findings), so they are
caught at commit time instead of at the next redeploy:

  L1  encoding      non-ASCII / CR bytes in committed text (repo rule: ASCII+LF).
                    Carve-out: docs/design-decisions.md legacy D-001..018 region
                    (em-dash style documented) -> WARN with count, never FAIL.
  L2  stale tokens  retired space names / CIDRs / VIP band in LIVE docs
                    (runbooks/, scripts/, bundle.yaml, README.md). Lines that
                    are explicitly historical (retired/STALE/superseded/D-058/
                    D-060/DOCFIX context) are exempt.
  L3  ghost refs    scripts/<name>.(sh|py) referenced in a runbook must exist.
  L4  deprecated    invoking a deprecated/retired script outside a deprecation
                    or historical context.
  L5  numbering     duplicate D-/DOCFIX-/BUNDLEFIX- definition headings in the
                    decision/changelog docs (collision guard); prints next-free.
  L6  bare invoke   runbook lines executing scripts/*.sh without a bash/source
                    prefix (repo carries NO exec bits -- DOCFIX-069; bare form
                    fails "Permission denied" on a fresh clone).

Exit: 0 clean | 1 FAIL findings | 2 warnings only.  ASCII + LF.
Usage: python3 scripts/repo_lint.py [repo-root]
(marker: repo-lint: allow-stale-tokens -- this file names them by necessity)
"""
import re, sys, pathlib, collections

def main():
    R = pathlib.Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
    fails, warns = [], []

    def live_docs():
        out = [R / "README.md", R / "bundle.yaml"]
        out += sorted((R / "runbooks").glob("*.md")) if (R / "runbooks").is_dir() else []
        out += sorted((R / "scripts").iterdir()) if (R / "scripts").is_dir() else []
        return [p for p in out if p.is_file()]

    def all_text():
        exts = {".md", ".sh", ".py", ".yaml", ".yml", ""}
        skip_names = {"overrides.zip"}
        out = []
        for p in R.rglob("*"):
            if not p.is_file() or ".git" in p.parts or "__pycache__" in p.parts:
                continue
            if p.suffix.lower() == ".zip" or p.name in skip_names:
                continue
            if p.suffix in exts or p.name == ".gitattributes":
                out.append(p)
        return out

    # ---- L1 encoding ----
    for p in all_text():
        data = p.read_bytes()
        rel = str(p.relative_to(R))
        cr = data.count(b"\x0d")
        na = sum(1 for b in data if b > 127)
        if cr:
            fails.append("L1 %s: %d CR byte(s) (repo is LF-only)" % (rel, cr))
        if na:
            if rel == "docs/design-decisions.md":
                warns.append("L1 %s: %d non-ASCII byte(s) (legacy D-001..018 carve-out; "
                             "NEW entries must be ASCII)" % (rel, na))
            else:
                fails.append("L1 %s: %d non-ASCII byte(s) (repo rule: ASCII only)" % (rel, na))

    # ---- L2 stale tokens in live docs ----
    STALE = [
        (re.compile(r"\b(provider-vip|fabric-data)\b"), "retired space name"),
        (re.compile(r"\b(?<![\w.])lbaas\b(?!-)"), "retired lbaas space"),
        (re.compile(r"10\.12\.(20|24|60)\."), "D-058-era CIDR (never deployed)"),
        (re.compile(r"10\.12\.4\.2(2[4-9]|3[0-6])\b"), "pre-R14 VIP band"),
        (re.compile(r"jesse\.austin/openstack-caracal-ipv4"), "dead repo path"),
    ]
    EXEMPT = re.compile(r"retired|stale|supersed|historical|deprecat|D-05[3-9]|D-060|DOCFIX|"
                        r"must[- ]be[- ]absent|no provider-vip|renam|8_lbaas|ex-lbaas|old",
                        re.IGNORECASE)
    for p in live_docs():
        try:
            txt = p.read_text(errors="replace")
        except Exception:
            continue
        # explicit per-file opt-out for guard scripts whose PURPOSE is naming
        # stale tokens (fail-closed checks, deprecation lists):
        if "repo-lint: allow-stale-tokens" in txt:
            continue
        lines = txt.splitlines()
        rel = str(p.relative_to(R))
        for i, ln in enumerate(lines, 1):
            if EXEMPT.search(ln):
                continue
            for rx, why in STALE:
                if rx.search(ln):
                    fails.append("L2 %s:%d %s: %s" % (rel, i, why, ln.strip()[:80]))

    # ---- L3 ghost script refs / L4 deprecated / L6 bare invocation ----
    DEPRECATED = ["phase-00-teardown.sh", "phase-00-maas-recidr.sh",
                  "provider-vip-standup.sh", "d057-bundle-check.py",
                  "review-bundle.py",
                  "04a-capi-bootstrap-cluster", "05-magnum-capi-driver"]
    DEP_EXEMPT = re.compile(r"deprecat|retired|historical|git rm|DO NOT USE|absorbed|replac",
                            re.IGNORECASE)
    ref_rx = re.compile(r"scripts/([a-z0-9_\-]+\.(?:sh|py))")
    bare_rx = re.compile(r"^(?!.*\b(?:bash|source|python3?)\b)[^#]*(?<![\w/.])(?:\./)?scripts/[a-z0-9_\-]+\.sh\b")
    rb_dir = R / "runbooks"
    for p in (sorted(rb_dir.glob("*.md")) if rb_dir.is_dir() else []):
        rel = str(p.relative_to(R))
        in_code = False
        for i, ln in enumerate(p.read_text(errors="replace").splitlines(), 1):
            if ln.strip().startswith("```"):
                in_code = not in_code
                continue
            for m in ref_rx.finditer(ln):
                if not (R / "scripts" / m.group(1)).exists():
                    fails.append("L3 %s:%d references missing scripts/%s" % (rel, i, m.group(1)))
            if not DEP_EXEMPT.search(ln):
                for d in DEPRECATED:
                    if d in ln:
                        fails.append("L4 %s:%d references deprecated %s" % (rel, i, d))
            if in_code and bare_rx.search(ln):
                fails.append("L6 %s:%d bare script invocation (no exec bits in repo; "
                             "use 'bash scripts/...'): %s" % (rel, i, ln.strip()[:70]))

    # ---- L5 identifier numbering ----
    heads = collections.Counter()
    for name in ("docs/design-decisions.md",):
        p = R / name
        if not p.exists():
            continue
        for m in re.finditer(r"(?m)^##+\s+(D-0\d{2})\b(?!.*AMENDMENT|.*RESOLVED)",
                             p.read_text(errors="replace")):
            heads[m.group(1)] += 1
    for ident, n in sorted(heads.items()):
        if n > 1:
            fails.append("L5 %s defined %d times in design-decisions.md (collision)" % (ident, n))
    used = collections.defaultdict(set)
    for p in all_text():
        for m in re.finditer(r"\b(D|DOCFIX|BUNDLEFIX)-(0\d{2})\b", p.read_text(errors="replace")):
            used[m.group(1)].add(int(m.group(2)))
    nf = ", ".join("%s-%03d" % (k, max(v) + 1) for k, v in sorted(used.items()) if v)
    print("  [info] L5 next-free identifiers: %s" % (nf or "n/a"))

    for w in warns:
        print("  [WARN] %s" % w)
    for f in fails:
        print("  [FAIL] %s" % f)
    verdict = "FAIL" if fails else ("WARN" if warns else "PASS")
    print("\n%s: repo lint (%d fail, %d warn)" % (verdict, len(fails), len(warns)))
    return 1 if fails else (2 if warns else 0)

if __name__ == "__main__":
    sys.exit(main())