#!/usr/bin/env python3
"""
repo_lint.py -- static hygiene lint for the openstack-caracal-ipv4 repo (DOCFIX-074).
Read-only. Catches the drift classes that accumulated silently between the D-052
rebuild and the 2026-07-02 redeploy-readiness sweep (46 findings), so they are
caught at commit time instead of at the next redeploy:
L1 encoding non-ASCII / CR bytes in committed text (repo rule: ASCII+LF).
Carve-out: docs/design-decisions.md legacy D-001..018 region
(em-dash style documented) -> WARN with count, never FAIL.
L2 stale tokens retired space names / CIDRs / VIP band in LIVE docs
(runbooks/, scripts/, bundle.yaml, README.md). Lines that
are explicitly historical (retired/STALE/superseded/D-058/
D-060/DOCFIX context) are exempt.
L3 ghost refs scripts/<name>.(sh|py) referenced in a runbook must exist.
L4 deprecated invoking a deprecated/retired script outside a deprecation
or historical context.
L5 numbering duplicate D-/DOCFIX-/BUNDLEFIX- definition headings in the
decision/changelog docs (collision guard); prints next-free.
L6 bare invoke runbook lines executing scripts/*.sh without a bash/source
prefix (repo carries NO exec bits -- DOCFIX-069; bare form
fails "Permission denied" on a fresh clone).
Exit: 0 clean | 1 FAIL findings | 2 warnings only. ASCII + LF.
Usage: python3 scripts/repo_lint.py [repo-root]
(marker: repo-lint: allow-stale-tokens -- this file names them by necessity)
"""
import re, sys, pathlib, collections
def main():
R = pathlib.Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
fails, warns = [], []
def live_docs():
out = [R / "README.md", R / "bundle.yaml"]
out += sorted((R / "runbooks").glob("*.md")) if (R / "runbooks").is_dir() else []
out += sorted((R / "scripts").iterdir()) if (R / "scripts").is_dir() else []
return [p for p in out if p.is_file()]
def all_text():
exts = {".md", ".sh", ".py", ".yaml", ".yml", ""}
skip_names = {"overrides.zip"}
out = []
for p in R.rglob("*"):
if not p.is_file() or ".git" in p.parts or "__pycache__" in p.parts:
continue
if p.suffix.lower() == ".zip" or p.name in skip_names:
continue
if p.suffix in exts or p.name == ".gitattributes":
out.append(p)
return out
# ---- L1 encoding ----
for p in all_text():
data = p.read_bytes()
rel = str(p.relative_to(R))
cr = data.count(b"\x0d")
na = sum(1 for b in data if b > 127)
if cr:
fails.append("L1 %s: %d CR byte(s) (repo is LF-only)" % (rel, cr))
if na:
if rel == "docs/design-decisions.md":
warns.append("L1 %s: %d non-ASCII byte(s) (legacy D-001..018 carve-out; "
"NEW entries must be ASCII)" % (rel, na))
else:
fails.append("L1 %s: %d non-ASCII byte(s) (repo rule: ASCII only)" % (rel, na))
# ---- L2 stale tokens in live docs ----
STALE = [
(re.compile(r"\b(provider-vip|fabric-data)\b"), "retired space name"),
(re.compile(r"\b(?<![\w.])lbaas\b(?!-)"), "retired lbaas space"),
(re.compile(r"10\.12\.(20|24|60)\."), "D-058-era CIDR (never deployed)"),
(re.compile(r"10\.12\.4\.2(2[4-9]|3[0-6])\b"), "pre-R14 VIP band"),
(re.compile(r"jesse\.austin/openstack-caracal-ipv4"), "dead repo path"),
]
EXEMPT = re.compile(r"retired|stale|supersed|historical|deprecat|D-05[3-9]|D-060|DOCFIX|"
r"must[- ]be[- ]absent|no provider-vip|renam|8_lbaas|ex-lbaas|old",
re.IGNORECASE)
for p in live_docs():
try:
txt = p.read_text(errors="replace")
except Exception:
continue
# explicit per-file opt-out for guard scripts whose PURPOSE is naming
# stale tokens (fail-closed checks, deprecation lists):
if "repo-lint: allow-stale-tokens" in txt:
continue
lines = txt.splitlines()
rel = str(p.relative_to(R))
for i, ln in enumerate(lines, 1):
if EXEMPT.search(ln):
continue
for rx, why in STALE:
if rx.search(ln):
fails.append("L2 %s:%d %s: %s" % (rel, i, why, ln.strip()[:80]))
# ---- L3 ghost script refs / L4 deprecated / L6 bare invocation ----
DEPRECATED = ["phase-00-teardown.sh", "phase-00-maas-recidr.sh",
"provider-vip-standup.sh", "d057-bundle-check.py",
"review-bundle.py",
"04a-capi-bootstrap-cluster", "05-magnum-capi-driver"]
DEP_EXEMPT = re.compile(r"deprecat|retired|historical|git rm|DO NOT USE|absorbed|replac",
re.IGNORECASE)
ref_rx = re.compile(r"scripts/([a-z0-9_\-]+\.(?:sh|py))")
bare_rx = re.compile(r"^(?!.*\b(?:bash|source|python3?)\b)[^#]*(?<![\w/.])(?:\./)?scripts/[a-z0-9_\-]+\.sh\b")
rb_dir = R / "runbooks"
for p in (sorted(rb_dir.glob("*.md")) if rb_dir.is_dir() else []):
rel = str(p.relative_to(R))
in_code = False
for i, ln in enumerate(p.read_text(errors="replace").splitlines(), 1):
if ln.strip().startswith("```"):
in_code = not in_code
continue
for m in ref_rx.finditer(ln):
if not (R / "scripts" / m.group(1)).exists():
fails.append("L3 %s:%d references missing scripts/%s" % (rel, i, m.group(1)))
if not DEP_EXEMPT.search(ln):
for d in DEPRECATED:
if d in ln:
fails.append("L4 %s:%d references deprecated %s" % (rel, i, d))
if in_code and bare_rx.search(ln):
fails.append("L6 %s:%d bare script invocation (no exec bits in repo; "
"use 'bash scripts/...'): %s" % (rel, i, ln.strip()[:70]))
# ---- L5 identifier numbering ----
heads = collections.Counter()
for name in ("docs/design-decisions.md",):
p = R / name
if not p.exists():
continue
for m in re.finditer(r"(?m)^##+\s+(D-0\d{2})\b(?!.*AMENDMENT|.*RESOLVED)",
p.read_text(errors="replace")):
heads[m.group(1)] += 1
for ident, n in sorted(heads.items()):
if n > 1:
fails.append("L5 %s defined %d times in design-decisions.md (collision)" % (ident, n))
used = collections.defaultdict(set)
for p in all_text():
for m in re.finditer(r"\b(D|DOCFIX|BUNDLEFIX)-(0\d{2})\b", p.read_text(errors="replace")):
used[m.group(1)].add(int(m.group(2)))
nf = ", ".join("%s-%03d" % (k, max(v) + 1) for k, v in sorted(used.items()) if v)
print(" [info] L5 next-free identifiers: %s" % (nf or "n/a"))
for w in warns:
print(" [WARN] %s" % w)
for f in fails:
print(" [FAIL] %s" % f)
verdict = "FAIL" if fails else ("WARN" if warns else "PASS")
print("\n%s: repo lint (%d fail, %d warn)" % (verdict, len(fails), len(warns)))
return 1 if fails else (2 if warns else 0)
if __name__ == "__main__":
sys.exit(main())