#!/usr/bin/env python3 """Very small structural lint for reveal.js HTML decks. This is not a full HTML validator. It only checks explicit tag nesting for the tags that usually matter in hand-edited reveal.js decks and emits compact file:line:column messages suitable for agent context. """ from __future__ import annotations import argparse import sys from dataclasses import dataclass from html.parser import HTMLParser from pathlib import Path VOID_TAGS = { "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr", } OPTIONAL_CLOSE_TAGS = { "dd", "dt", "li", "option", "p", "tbody", "td", "tfoot", "th", "thead", "tr", } @dataclass class OpenTag: name: str line: int column: int class TagStackParser(HTMLParser): def __init__(self, filename: str, max_problems: int) -> None: super().__init__(convert_charrefs=False) self.filename = filename self.max_problems = max_problems self.stack: list[OpenTag] = [] self.problems: list[str] = [] def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: tag = tag.lower() if tag in VOID_TAGS or tag in OPTIONAL_CLOSE_TAGS: return line, column = self.getpos() self.stack.append(OpenTag(tag, line, column + 1)) def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: return def handle_endtag(self, tag: str) -> None: tag = tag.lower() if tag in VOID_TAGS or tag in OPTIONAL_CLOSE_TAGS: return line, column = self.getpos() column += 1 if not self.stack: self._problem(line, column, f"stray ") return if self.stack[-1].name == tag: self.stack.pop() return for index in range(len(self.stack) - 2, -1, -1): if self.stack[index].name == tag: expected = self.stack[-1] self._problem( line, column, f"closing before opened at " f"{expected.line}:{expected.column}", ) del self.stack[index:] return expected = self.stack[-1] self._problem( line, column, f"stray ; currently inside <{expected.name}> " f"opened at {expected.line}:{expected.column}", ) def close(self) -> None: super().close() for open_tag in reversed(self.stack): self._problem( open_tag.line, open_tag.column, f"unclosed <{open_tag.name}>", ) def _problem(self, line: int, column: int, message: str) -> None: if len(self.problems) >= self.max_problems: return self.problems.append(f"{self.filename}:{line}:{column}: {message}") def lint_file(path: Path, max_problems: int) -> list[str]: parser = TagStackParser(str(path), max_problems=max_problems) try: text = path.read_text(encoding="utf-8", errors="replace") except OSError as exc: return [f"{path}:0:0: {exc}"] parser.feed(text) parser.close() return parser.problems def main() -> int: argparser = argparse.ArgumentParser( description="Check basic tag nesting in reveal.js HTML decks." ) argparser.add_argument("html_files", nargs="+", type=Path) argparser.add_argument( "--max-problems", type=int, default=80, help="maximum number of problems to print across all files", ) argparser.add_argument( "--verbose", action="store_true", help="print an OK line for files with no reported problems", ) args = argparser.parse_args() printed = 0 checked = 0 problem_files = 0 for path in args.html_files: checked += 1 remaining = max(args.max_problems - printed, 0) problems = lint_file(path, max(remaining, 0)) if problems: problem_files += 1 for problem in problems: print(problem) printed += 1 elif args.verbose: print(f"{path}: OK") if printed >= args.max_problems: break if printed >= args.max_problems: print(f"stopped after {args.max_problems} problem(s); narrow the file set") print(f"checked {checked} file(s), found {printed} problem(s) in {problem_files} file(s)") return 1 if printed else 0 if __name__ == "__main__": raise SystemExit(main())