#!/usr/bin/env python3
"""
lint-voice.py — deterministic voice linter for Oleg's texts.

Checks for AI/corporate artifacts, wrong typography, and banned patterns
that make text sound like a model rather than Oleg.

Usage:
    python lint-voice.py "text to check"
    echo "text" | python lint-voice.py
    python lint-voice.py < input.txt

Exit codes:
    0 — clean (no ERRORs; WARNs alone do not fail)
    1 — ERRORs found (agent must fix and re-run)
    2 — usage error
"""

import re
import sys


# (level, id, pattern, message)
# level: "ERROR" = must fix before delivery; "WARN" = check in context, may be OK
CHECKS = [
    # ── Typography ────────────────────────────────────────────────────────────
    ("ERROR", "EM_DASH",
     r"—",
     "Em-dash (—). Replace with colon, comma, or rephrase as separate sentence."),

    ("ERROR", "SMART_QUOTES",
     r'[""„«»]',
     'Smart/typographic double quote. Use straight " or rephrase.'),

    ("WARN", "SMART_APOSTROPHE",
     r"[''`]",
     "Smart/curly apostrophe or backtick. Use straight apostrophe '."),

    # ── AI / corporate words ──────────────────────────────────────────────────
    ("ERROR", "AI_LEVERAGE",
     r"(?i)\bleverag(e|es|ed|ing)\b",
     '"leverage" — AI/corporate word. Replace with plain verb (use, apply, build on).'),

    ("ERROR", "AI_ROBUST",
     r"(?i)\brobust\b",
     '"robust" — AI/corporate word. Replace with specific description.'),

    ("ERROR", "AI_SEAMLESS",
     r"(?i)\bseamless(ly)?\b",
     '"seamless" — AI/corporate word. Cut or replace with plain description.'),

    ("ERROR", "AI_PASSIONATE",
     r"(?i)\bpassionat(e|ely)\b|\bmy passion\b",
     '"passionate/passion" — AI/corporate word. Replace with direct statement.'),

    ("ERROR", "AI_THRILLED",
     r"(?i)\bthrilled\b",
     '"thrilled" — AI word. Replace with direct statement.'),

    ("ERROR", "AI_DELVE",
     r"(?i)\bdelv(e|ing|ed)\b",
     '"delve" — AI word. Replace with explore / look into / dig.'),

    ("ERROR", "AI_TAPESTRY",
     r"(?i)\btapestry\b",
     '"tapestry" — AI word. Remove or replace.'),

    ("ERROR", "AI_EMPOWER",
     r"(?i)\bempower(s|ed|ing)?\b",
     '"empower" — AI/corporate word. Replace with plain verb.'),

    ("ERROR", "AI_STREAMLINE",
     r"(?i)\bstreamline[ds]?\b|\bstreamlining\b",
     '"streamline" — AI/corporate word. Replace with plain verb.'),

    ("ERROR", "AI_ACTIONABLE",
     r"(?i)\bactionable\b",
     '"actionable" — AI/corporate word. Replace with plain description.'),

    ("ERROR", "AI_UNLOCK",
     r"(?i)\bunlock(s|ed|ing)?\b",
     '"unlock" — AI/corporate word. Replace with plain verb.'),

    ("ERROR", "AI_CUTTING_EDGE",
     r"(?i)cutting[- ]edge",
     '"cutting-edge" — AI/corporate phrase. Replace with specific description.'),

    ("WARN", "AI_INNOVATIVE",
     r"(?i)\binnovativ(e|ely)\b|\binnovation\b",
     '"innovative/innovation" — often AI/corporate filler. Replace with specific if used generically.'),

    # ── Sentence openers (capitalised opener form) ────────────────────────────
    # Pattern: word at line start OR after terminal punctuation + whitespace
    ("ERROR", "OPENER_FURTHERMORE",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Furthermore[,\s]",
     '"Furthermore" opener. Cut — connection should come from content, not connective.'),

    ("ERROR", "OPENER_MOREOVER",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Moreover[,\s]",
     '"Moreover" opener. Cut — restructure.'),

    ("ERROR", "OPENER_ADDITIONALLY",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Additionally[,\s]",
     '"Additionally" opener. Cut — restructure without connective.'),

    ("ERROR", "OPENER_ULTIMATELY",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Ultimately[,\s]",
     '"Ultimately" opener. Cut — restructure.'),

    ("ERROR", "OPENER_HOWEVER",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)However[,\s]",
     '"However" opener. Cut — use contrast from content, not a gluing connective.'),

    ("ERROR", "OPENER_IMPORTANTLY",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Importantly[,\s]",
     '"Importantly" opener — filler frame. Cut.'),

    ("ERROR", "OPENER_THAT_SAID",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)That said[,\s]",
     '"That said" opener. Cut — restructure.'),

    ("ERROR", "OPENER_IN_CONCLUSION",
     r"(?i)in conclusion[,\s]",
     '"In conclusion" — empty closing summary. Cut entirely.'),

    ("ERROR", "OPENER_OVERALL",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Overall[,\s]",
     '"Overall" opener — empty summary signal. Cut.'),

    ("WARN", "OPENER_SO_COMMA",
     r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)So[,\s]",
     '"So," opener. Oleg cuts these — connection should come from content.'),

    # ── Filler frames ─────────────────────────────────────────────────────────
    ("ERROR", "FILLER_WORTH_NOTING",
     r"(?i)it'?s worth noting",
     '"It\'s worth noting" — filler frame. Cut, say the thing directly.'),

    ("ERROR", "FILLER_IMPORTANT_REMEMBER",
     r"(?i)it'?s important to remember",
     '"It\'s important to remember" — filler frame. Cut.'),

    ("ERROR", "FILLER_I_AM_WRITING",
     r"(?i)\bI am writing to\b",
     '"I am writing to" — generic opener. Replace with direct lead.'),

    ("ERROR", "FILLER_IN_TODAYS",
     r"(?i)in today'?s\b",
     '"In today\'s..." — AI/generic opener. Cut.'),

    # ── AI structural patterns ────────────────────────────────────────────────
    ("ERROR", "PATTERN_ITS_NOT_JUST",
     r"(?i)it'?s not just",
     '"it\'s not just X, it\'s Y" construction. Rephrase as direct statement.'),

    ("WARN", "PATTERN_NOT_X_BUT",
     r"(?i)\bnot [\w]+(?:\s+[\w]+){0,3},?\s+but\b",
     '"not X, but Y" — often AI phrasing. Rephrase if rhetorical.'),

    ("ERROR", "PATTERN_HONEST_FRAMING",
     r"(?i)honest framing|one thing to flag|I want to be honest about",
     '"honest framing / one thing to flag" template. Not Oleg\'s pattern — remove.'),

    # ── AI closings ───────────────────────────────────────────────────────────
    ("ERROR", "CLOSING_LOOKING_FORWARD",
     r"(?i)looking forward to",
     '"Looking forward to" closing. Replace e.g. "Open to chat if it\'s a fit."'),

    ("ERROR", "CLOSING_HAPPY_TO",
     r"(?i)\bhappy to\b",
     '"happy to" — AI phrasing. Replace with plain statement.'),

    # ── Warn-only borderline cases ────────────────────────────────────────────
    ("WARN", "WARN_NAVIGATE",
     r"(?i)\bnavigat(e|ing|ed)\b",
     '"navigate" — check if metaphorical AI-speak ("navigate challenges"). OK if literal.'),

    ("WARN", "WARN_LANDSCAPE",
     r"(?i)\blandscape\b",
     '"landscape" — check if metaphorical AI-speak. OK if literal/technical.'),
]


def get_context(text: str, match: re.Match, window: int = 60) -> str:
    start = max(0, match.start() - window)
    end = min(len(text), match.end() + window)
    snippet = text[start:end]
    snippet = " ".join(snippet.split())
    prefix = "…" if start > 0 else ""
    suffix = "…" if end < len(text) else ""
    return f"{prefix}{snippet}{suffix}"


def line_of(text: str, pos: int) -> int:
    return text[:pos].count("\n") + 1


def run_checks(text: str) -> list[dict]:
    findings = []
    for level, check_id, pattern, message in CHECKS:
        for m in re.finditer(pattern, text):
            findings.append({
                "level": level,
                "id": check_id,
                "line": line_of(text, m.start()),
                "context": get_context(text, m),
                "message": message,
            })
    findings.sort(key=lambda f: (f["line"], f["id"]))
    return findings


def main() -> None:
    if len(sys.argv) > 1:
        text = " ".join(sys.argv[1:])
    elif not sys.stdin.isatty():
        text = sys.stdin.read()
    else:
        print("Usage: python lint-voice.py \"text\" OR pipe text via stdin", file=sys.stderr)
        sys.exit(2)

    if not text.strip():
        print("✓ CLEAN — empty input.", file=sys.stderr)
        sys.exit(0)

    findings = run_checks(text)
    errors = [f for f in findings if f["level"] == "ERROR"]
    warns  = [f for f in findings if f["level"] == "WARN"]

    if not findings:
        print("✓ CLEAN — no issues found.")
        sys.exit(0)

    print(f"Found {len(findings)} issue(s): {len(errors)} ERROR, {len(warns)} WARN\n")

    for f in findings:
        label = f"[{f['level']}]"
        action = "Fix" if f["level"] == "ERROR" else "Note"
        print(f"{label} {f['id']} — line {f['line']}")
        print(f'  Context: "{f["context"]}"')
        print(f"  {action}: {f['message']}")
        print()

    print("---")
    if errors:
        print(f"EXIT 1 — {len(errors)} error(s) present. Fix and re-run.")
        sys.exit(1)
    else:
        print("EXIT 0 — no errors (warnings only). Review WARNs if applicable.")
        sys.exit(0)


if __name__ == "__main__":
    main()