cv-2026/.claude/skills/write-as-oleg/lint-voice.py

255 lines
9.4 KiB
Python

#!/usr/bin/env python3
"""
lint-voice.py — deterministic voice linter for Oleg's texts.
Checks for AI/corporate artifacts, wrong typography, and banned patterns
that make text sound like a model rather than Oleg.
Usage:
python lint-voice.py "text to check"
echo "text" | python lint-voice.py
python lint-voice.py < input.txt
Exit codes:
0 — clean (no ERRORs; WARNs alone do not fail)
1 — ERRORs found (agent must fix and re-run)
2 — usage error
"""
import re
import sys
# (level, id, pattern, message)
# level: "ERROR" = must fix before delivery; "WARN" = check in context, may be OK
CHECKS = [
# ── Typography ────────────────────────────────────────────────────────────
("ERROR", "EM_DASH",
r"",
"Em-dash (—). Replace with colon, comma, or rephrase as separate sentence."),
("ERROR", "SMART_QUOTES",
r'[""„«»]',
'Smart/typographic double quote. Use straight " or rephrase.'),
("WARN", "SMART_APOSTROPHE",
r"[''`]",
"Smart/curly apostrophe or backtick. Use straight apostrophe '."),
# ── AI / corporate words ──────────────────────────────────────────────────
("ERROR", "AI_LEVERAGE",
r"(?i)\bleverag(e|es|ed|ing)\b",
'"leverage" — AI/corporate word. Replace with plain verb (use, apply, build on).'),
("ERROR", "AI_ROBUST",
r"(?i)\brobust\b",
'"robust" — AI/corporate word. Replace with specific description.'),
("ERROR", "AI_SEAMLESS",
r"(?i)\bseamless(ly)?\b",
'"seamless" — AI/corporate word. Cut or replace with plain description.'),
("ERROR", "AI_PASSIONATE",
r"(?i)\bpassionat(e|ely)\b|\bmy passion\b",
'"passionate/passion" — AI/corporate word. Replace with direct statement.'),
("ERROR", "AI_THRILLED",
r"(?i)\bthrilled\b",
'"thrilled" — AI word. Replace with direct statement.'),
("ERROR", "AI_DELVE",
r"(?i)\bdelv(e|ing|ed)\b",
'"delve" — AI word. Replace with explore / look into / dig.'),
("ERROR", "AI_TAPESTRY",
r"(?i)\btapestry\b",
'"tapestry" — AI word. Remove or replace.'),
("ERROR", "AI_EMPOWER",
r"(?i)\bempower(s|ed|ing)?\b",
'"empower" — AI/corporate word. Replace with plain verb.'),
("ERROR", "AI_STREAMLINE",
r"(?i)\bstreamline[ds]?\b|\bstreamlining\b",
'"streamline" — AI/corporate word. Replace with plain verb.'),
("ERROR", "AI_ACTIONABLE",
r"(?i)\bactionable\b",
'"actionable" — AI/corporate word. Replace with plain description.'),
("ERROR", "AI_UNLOCK",
r"(?i)\bunlock(s|ed|ing)?\b",
'"unlock" — AI/corporate word. Replace with plain verb.'),
("ERROR", "AI_CUTTING_EDGE",
r"(?i)cutting[- ]edge",
'"cutting-edge" — AI/corporate phrase. Replace with specific description.'),
("WARN", "AI_INNOVATIVE",
r"(?i)\binnovativ(e|ely)\b|\binnovation\b",
'"innovative/innovation" — often AI/corporate filler. Replace with specific if used generically.'),
# ── Sentence openers (capitalised opener form) ────────────────────────────
# Pattern: word at line start OR after terminal punctuation + whitespace
("ERROR", "OPENER_FURTHERMORE",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Furthermore[,\s]",
'"Furthermore" opener. Cut — connection should come from content, not connective.'),
("ERROR", "OPENER_MOREOVER",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Moreover[,\s]",
'"Moreover" opener. Cut — restructure.'),
("ERROR", "OPENER_ADDITIONALLY",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Additionally[,\s]",
'"Additionally" opener. Cut — restructure without connective.'),
("ERROR", "OPENER_ULTIMATELY",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Ultimately[,\s]",
'"Ultimately" opener. Cut — restructure.'),
("ERROR", "OPENER_HOWEVER",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)However[,\s]",
'"However" opener. Cut — use contrast from content, not a gluing connective.'),
("ERROR", "OPENER_IMPORTANTLY",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Importantly[,\s]",
'"Importantly" opener — filler frame. Cut.'),
("ERROR", "OPENER_THAT_SAID",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)That said[,\s]",
'"That said" opener. Cut — restructure.'),
("ERROR", "OPENER_IN_CONCLUSION",
r"(?i)in conclusion[,\s]",
'"In conclusion" — empty closing summary. Cut entirely.'),
("ERROR", "OPENER_OVERALL",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)Overall[,\s]",
'"Overall" opener — empty summary signal. Cut.'),
("WARN", "OPENER_SO_COMMA",
r"(?m)(?:^[ \t]*|(?<=[.!?])[ \t]+)So[,\s]",
'"So," opener. Oleg cuts these — connection should come from content.'),
# ── Filler frames ─────────────────────────────────────────────────────────
("ERROR", "FILLER_WORTH_NOTING",
r"(?i)it'?s worth noting",
'"It\'s worth noting" — filler frame. Cut, say the thing directly.'),
("ERROR", "FILLER_IMPORTANT_REMEMBER",
r"(?i)it'?s important to remember",
'"It\'s important to remember" — filler frame. Cut.'),
("ERROR", "FILLER_I_AM_WRITING",
r"(?i)\bI am writing to\b",
'"I am writing to" — generic opener. Replace with direct lead.'),
("ERROR", "FILLER_IN_TODAYS",
r"(?i)in today'?s\b",
'"In today\'s..." — AI/generic opener. Cut.'),
# ── AI structural patterns ────────────────────────────────────────────────
("ERROR", "PATTERN_ITS_NOT_JUST",
r"(?i)it'?s not just",
'"it\'s not just X, it\'s Y" construction. Rephrase as direct statement.'),
("WARN", "PATTERN_NOT_X_BUT",
r"(?i)\bnot [\w]+(?:\s+[\w]+){0,3},?\s+but\b",
'"not X, but Y" — often AI phrasing. Rephrase if rhetorical.'),
("ERROR", "PATTERN_HONEST_FRAMING",
r"(?i)honest framing|one thing to flag|I want to be honest about",
'"honest framing / one thing to flag" template. Not Oleg\'s pattern — remove.'),
# ── AI closings ───────────────────────────────────────────────────────────
("ERROR", "CLOSING_LOOKING_FORWARD",
r"(?i)looking forward to",
'"Looking forward to" closing. Replace e.g. "Open to chat if it\'s a fit."'),
("ERROR", "CLOSING_HAPPY_TO",
r"(?i)\bhappy to\b",
'"happy to" — AI phrasing. Replace with plain statement.'),
# ── Warn-only borderline cases ────────────────────────────────────────────
("WARN", "WARN_NAVIGATE",
r"(?i)\bnavigat(e|ing|ed)\b",
'"navigate" — check if metaphorical AI-speak ("navigate challenges"). OK if literal.'),
("WARN", "WARN_LANDSCAPE",
r"(?i)\blandscape\b",
'"landscape" — check if metaphorical AI-speak. OK if literal/technical.'),
]
def get_context(text: str, match: re.Match, window: int = 60) -> str:
start = max(0, match.start() - window)
end = min(len(text), match.end() + window)
snippet = text[start:end]
snippet = " ".join(snippet.split())
prefix = "" if start > 0 else ""
suffix = "" if end < len(text) else ""
return f"{prefix}{snippet}{suffix}"
def line_of(text: str, pos: int) -> int:
return text[:pos].count("\n") + 1
def run_checks(text: str) -> list[dict]:
findings = []
for level, check_id, pattern, message in CHECKS:
for m in re.finditer(pattern, text):
findings.append({
"level": level,
"id": check_id,
"line": line_of(text, m.start()),
"context": get_context(text, m),
"message": message,
})
findings.sort(key=lambda f: (f["line"], f["id"]))
return findings
def main() -> None:
if len(sys.argv) > 1:
text = " ".join(sys.argv[1:])
elif not sys.stdin.isatty():
text = sys.stdin.read()
else:
print("Usage: python lint-voice.py \"text\" OR pipe text via stdin", file=sys.stderr)
sys.exit(2)
if not text.strip():
print("✓ CLEAN — empty input.", file=sys.stderr)
sys.exit(0)
findings = run_checks(text)
errors = [f for f in findings if f["level"] == "ERROR"]
warns = [f for f in findings if f["level"] == "WARN"]
if not findings:
print("✓ CLEAN — no issues found.")
sys.exit(0)
print(f"Found {len(findings)} issue(s): {len(errors)} ERROR, {len(warns)} WARN\n")
for f in findings:
label = f"[{f['level']}]"
action = "Fix" if f["level"] == "ERROR" else "Note"
print(f"{label} {f['id']} — line {f['line']}")
print(f' Context: "{f["context"]}"')
print(f" {action}: {f['message']}")
print()
print("---")
if errors:
print(f"EXIT 1 — {len(errors)} error(s) present. Fix and re-run.")
sys.exit(1)
else:
print("EXIT 0 — no errors (warnings only). Review WARNs if applicable.")
sys.exit(0)
if __name__ == "__main__":
main()