Skip to main content

colorful_cli/
lib.rs

1//! Colorize English prose by part of speech in the terminal.
2//!
3//! This is a driving adapter: it wires the [`ProseParser`] and
4//! [`ClosedClassLexicon`] together through a `LexicalAnnotator` and renders the
5//! classified token stream as ANSI-colored text. The same classification feeds
6//! the LSP server; here it lands as color in a terminal with no editor.
7
8#![forbid(unsafe_code)]
9#![warn(missing_docs)]
10
11use std::io::{self, Read, Write};
12use std::process::ExitCode;
13
14use colorful_core::{Analyzer, Annotator, Finding, LexicalAnnotator, Parser, PosClass, Severity};
15use colorful_lexicon::ClosedClassLexicon;
16use colorful_lint::ProseLinter;
17use colorful_parse::ProseParser;
18
19/// The ANSI SGR parameter used to color a class, or `None` to leave it plain.
20///
21/// The colors are not chosen here: the class maps to an abstract `VisualRole`,
22/// which the `colorful.vocabulary/v1` manifest projects onto ANSI. The same
23/// manifest drives the LSP and the graft consumer, so all three surfaces agree.
24fn sgr(class: PosClass) -> Option<&'static str> {
25    let role = colorful_ir::vocabulary::visual_role_for(class);
26    colorful_ir::vocabulary::projection(&role).ansi.as_deref()
27}
28
29/// Render `source` with ANSI color per part of speech.
30///
31/// When `color` is `false`, `source` is returned unchanged (a faithful
32/// passthrough), so piping through the tool never alters the text.
33#[must_use]
34pub fn colorize(source: &str, color: bool) -> String {
35    if !color {
36        return source.to_string();
37    }
38
39    let tree = ProseParser::new().parse(source);
40    let tokens = LexicalAnnotator::new(ClosedClassLexicon::new()).annotate(source, &tree);
41
42    let mut out = String::with_capacity(source.len() + tokens.len() * 8);
43    let mut prev = 0;
44    for token in tokens {
45        // Emit the gap (whitespace and anything between tokens) verbatim.
46        if token.span.start > prev {
47            out.push_str(source.get(prev..token.span.start).unwrap_or(""));
48        }
49        let text = token.span.slice(source);
50        if let Some(code) = sgr(token.class) {
51            out.push_str("\x1b[");
52            out.push_str(code);
53            out.push('m');
54            out.push_str(text);
55            out.push_str("\x1b[0m");
56        } else {
57            out.push_str(text);
58        }
59        prev = token.span.end;
60    }
61    if prev < source.len() {
62        out.push_str(source.get(prev..).unwrap_or(""));
63    }
64    out
65}
66
67/// Decide whether to emit color, honoring `--no-color` and the `NO_COLOR`
68/// convention (<https://no-color.org/>): color is on unless either is set.
69#[must_use]
70pub fn decide_color(no_color_flag: bool, no_color_env: bool) -> bool {
71    !no_color_flag && !no_color_env
72}
73
74const HELP: &str = "\
75colorful — color English prose by part of speech
76
77USAGE:
78    colorful [OPTIONS] [FILE]
79    colorful lint [FILE]
80    colorful ir [FILE]
81
82ARGS:
83    FILE          Path to read; omit or use \"-\" to read standard input.
84
85OPTIONS:
86    --no-color    Pass the text through without ANSI color.
87    -h, --help    Show this help.
88
89SUBCOMMANDS:
90    lint          Report prose problems (weak words, run-ons, passives); exits
91                  non-zero when any are found.
92    ir            Emit the colorful.syntax/v1 IR as canonical JSON.
93
94Color is disabled automatically when the NO_COLOR environment variable is set.
95";
96
97/// Run the CLI over `args` (the program's arguments, excluding `argv[0]`).
98///
99/// Returns the process [`ExitCode`]: `lint` exits non-zero when it reports
100/// findings; every other path exits zero on success.
101///
102/// # Errors
103///
104/// Returns an error if the input file cannot be read, standard input cannot be
105/// read, or an unknown flag is supplied.
106pub fn run<I>(args: I) -> io::Result<ExitCode>
107where
108    I: IntoIterator<Item = String>,
109{
110    let args: Vec<String> = args.into_iter().collect();
111    match args.first().map(String::as_str) {
112        Some("ir") => run_ir(args.iter().skip(1).cloned()).map(|()| ExitCode::SUCCESS),
113        Some("lint") => run_lint(args.iter().skip(1).cloned()),
114        Some("color") => run_color(args.iter().skip(1).cloned()).map(|()| ExitCode::SUCCESS),
115        _ => run_color(args).map(|()| ExitCode::SUCCESS),
116    }
117}
118
119/// Colorize prose to ANSI in the terminal (the default subcommand).
120fn run_color<I>(args: I) -> io::Result<()>
121where
122    I: IntoIterator<Item = String>,
123{
124    let mut no_color_flag = false;
125    let mut path: Option<String> = None;
126    let mut end_of_options = false;
127
128    for arg in args {
129        if end_of_options {
130            path = Some(arg);
131            continue;
132        }
133        match arg.as_str() {
134            "--" => end_of_options = true,
135            "--no-color" => no_color_flag = true,
136            "-h" | "--help" => {
137                print!("{HELP}");
138                return Ok(());
139            }
140            "-" => path = None,
141            other if other.starts_with('-') && other.len() > 1 => {
142                return Err(io::Error::new(
143                    io::ErrorKind::InvalidInput,
144                    format!("unknown option: {other}"),
145                ));
146            }
147            other => path = Some(other.to_string()),
148        }
149    }
150
151    let input = match path {
152        Some(p) => std::fs::read_to_string(p)?,
153        None => {
154            let mut buf = String::new();
155            io::stdin().read_to_string(&mut buf)?;
156            buf
157        }
158    };
159
160    let color = decide_color(no_color_flag, std::env::var_os("NO_COLOR").is_some());
161    let mut stdout = io::stdout().lock();
162    stdout.write_all(colorize(&input, color).as_bytes())?;
163    stdout.flush()
164}
165
166/// Emit the `colorful.syntax/v1` IR (`DocumentAnalysis`) as canonical JSON.
167///
168/// `colorful ir [FILE]` — reads the file (or stdin), parses and classifies it,
169/// and prints the IR a back-end (graft, jedit, an editor) can consume.
170fn run_ir<I>(args: I) -> io::Result<()>
171where
172    I: IntoIterator<Item = String>,
173{
174    let mut path: Option<String> = None;
175    let mut end_of_options = false;
176    for arg in args {
177        if end_of_options {
178            path = Some(arg);
179            continue;
180        }
181        match arg.as_str() {
182            "--" => end_of_options = true,
183            "-h" | "--help" => {
184                print!("colorful ir [FILE]\n\nEmit the colorful.syntax/v1 IR as canonical JSON (stdin if no FILE).\n");
185                return Ok(());
186            }
187            "-" => path = None,
188            other if other.starts_with('-') && other.len() > 1 => {
189                return Err(io::Error::new(
190                    io::ErrorKind::InvalidInput,
191                    format!("unknown option: {other}"),
192                ));
193            }
194            other => path = Some(other.to_string()),
195        }
196    }
197
198    let (unit_id, input) = match path {
199        Some(p) => {
200            let contents = std::fs::read_to_string(&p)?;
201            (p, contents)
202        }
203        None => {
204            let mut buf = String::new();
205            io::stdin().read_to_string(&mut buf)?;
206            ("stdin".to_string(), buf)
207        }
208    };
209
210    let tree = ProseParser::new().parse(&input);
211    let tokens = LexicalAnnotator::new(ClosedClassLexicon::new()).annotate(&input, &tree);
212    let document = colorful_ir::from_classification(&unit_id, &input, &tree, &tokens)
213        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
214    let json = colorful_ir::canonical_json(&document)
215        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
216
217    let mut stdout = io::stdout().lock();
218    stdout.write_all(json.as_bytes())?;
219    stdout.write_all(b"\n")?;
220    stdout.flush()
221}
222
223/// Report prose problems for a file (the `lint` subcommand).
224///
225/// `colorful lint [FILE]` — reads the file (or stdin), runs the
226/// [`ProseLinter`], and prints one compiler-style line per finding. Exits
227/// non-zero when any findings are reported, so it fails a CI gate on bad prose.
228fn run_lint<I>(args: I) -> io::Result<ExitCode>
229where
230    I: IntoIterator<Item = String>,
231{
232    let mut path: Option<String> = None;
233    let mut end_of_options = false;
234    for arg in args {
235        if end_of_options {
236            path = Some(arg);
237            continue;
238        }
239        match arg.as_str() {
240            "--" => end_of_options = true,
241            "-h" | "--help" => {
242                print!("colorful lint [FILE]\n\nReport prose problems (stdin if no FILE). Exits non-zero when any are found.\n");
243                return Ok(ExitCode::SUCCESS);
244            }
245            "-" => path = None,
246            other if other.starts_with('-') && other.len() > 1 => {
247                return Err(io::Error::new(
248                    io::ErrorKind::InvalidInput,
249                    format!("unknown option: {other}"),
250                ));
251            }
252            other => path = Some(other.to_string()),
253        }
254    }
255
256    let (name, input) = match path {
257        Some(p) => {
258            let contents = std::fs::read_to_string(&p)?;
259            (p, contents)
260        }
261        None => {
262            let mut buf = String::new();
263            io::stdin().read_to_string(&mut buf)?;
264            ("<stdin>".to_string(), buf)
265        }
266    };
267
268    let mut stdout = io::stdout().lock();
269    let found = lint_to_writer(&name, &input, &mut stdout)?;
270    stdout.flush()?;
271    Ok(if found {
272        ExitCode::FAILURE
273    } else {
274        ExitCode::SUCCESS
275    })
276}
277
278/// Lint `source` and write the report to `out`, one finding per line. Returns
279/// whether any findings were reported, which the caller turns into the exit
280/// code. Factored out of [`run_lint`] so the format and the exit decision are
281/// testable without touching the filesystem.
282fn lint_to_writer<W: Write>(name: &str, source: &str, out: &mut W) -> io::Result<bool> {
283    let tree = ProseParser::new().parse(source);
284    let tokens = LexicalAnnotator::new(ClosedClassLexicon::new()).annotate(source, &tree);
285    let findings = ProseLinter::new().analyze(source, &tree, &tokens);
286    out.write_all(lint_report(name, source, &findings).as_bytes())?;
287    Ok(!findings.is_empty())
288}
289
290/// Render `findings` as compiler-style diagnostic lines:
291/// `name:line:col: severity [code]: message`. Returns `""` for no findings, so
292/// clean input prints nothing.
293#[must_use]
294pub fn lint_report(name: &str, source: &str, findings: &[Finding]) -> String {
295    let mut out = String::new();
296    for finding in findings {
297        let (line, col) = line_col(source, finding.span.start);
298        let severity = match finding.severity {
299            Severity::Warning => "warning",
300            Severity::Info => "info",
301        };
302        out.push_str(&format!(
303            "{name}:{line}:{col}: {severity} [{code}]: {message}\n",
304            code = finding.rule.code(),
305            message = finding.message,
306        ));
307    }
308    out
309}
310
311/// The 1-based `(line, column)` of byte offset `byte` in `source`, counting
312/// columns in characters. Lines are split on `\n`.
313fn line_col(source: &str, byte: usize) -> (usize, usize) {
314    let mut line = 1usize;
315    let mut col = 1usize;
316    for (i, ch) in source.char_indices() {
317        if i >= byte {
318            break;
319        }
320        if ch == '\n' {
321            line += 1;
322            col = 1;
323        } else {
324            col += 1;
325        }
326    }
327    (line, col)
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn passthrough_when_color_disabled() {
336        let s = "The cat is 3.\nA second line.";
337        assert_eq!(colorize(s, false), s);
338    }
339
340    #[test]
341    fn golden_colored_output() {
342        // "The" (function), cat (content), "is" (function), 3 (number),
343        // "." (punctuation), with whitespace preserved verbatim.
344        let got = colorize("The cat is 3.", true);
345        let want = "\x1b[1;35mThe\x1b[0m cat \x1b[1;35mis\x1b[0m \x1b[36m3\x1b[0m\x1b[90m.\x1b[0m";
346        assert_eq!(got, want);
347    }
348
349    #[test]
350    fn golden_proper_noun_output() {
351        // Mid-sentence capitalized "Paris" becomes a (bold yellow) proper noun.
352        let got = colorize("I visited Paris.", true);
353        let want = "\x1b[1;35mI\x1b[0m visited \x1b[1;33mParis\x1b[0m\x1b[90m.\x1b[0m";
354        assert_eq!(got, want);
355    }
356
357    #[test]
358    fn gaps_and_newlines_are_preserved_exactly() {
359        // Stripping all ANSI escapes must reproduce the original source.
360        let src = "Well,  \t\"quoted\"\n  text—here.";
361        let colored = colorize(src, true);
362        let stripped = strip_ansi(&colored);
363        assert_eq!(stripped, src);
364    }
365
366    #[test]
367    fn double_dash_allows_dash_prefixed_paths() {
368        // After `--`, a leading-dash argument is treated as a path: reading it
369        // fails with NotFound, not an "unknown option" InvalidInput.
370        let err = run(["--".to_string(), "-weird.txt".to_string()]).unwrap_err();
371        assert_eq!(err.kind(), io::ErrorKind::NotFound);
372        // Without `--`, the same argument is rejected as an unknown option.
373        let err = run(["-weird.txt".to_string()]).unwrap_err();
374        assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
375    }
376
377    #[test]
378    fn lint_reports_findings_in_compiler_style_and_signals_failure() {
379        // "just" is a weak word at column 9; the report names the file, position,
380        // severity, rule code, and message, and the writer reports a failure.
381        let mut buf = Vec::new();
382        let found = lint_to_writer("draft.txt", "This is just wrong.", &mut buf).unwrap();
383        assert!(found, "findings should signal a non-zero exit");
384        assert_eq!(
385            String::from_utf8(buf).unwrap(),
386            "draft.txt:1:9: info [weak-word]: weak word 'just'\n"
387        );
388    }
389
390    #[test]
391    fn lint_of_clean_prose_prints_nothing_and_signals_success() {
392        let mut buf = Vec::new();
393        let found = lint_to_writer("clean.txt", "The cat sat on the mat.", &mut buf).unwrap();
394        assert!(!found, "clean prose should signal a zero exit");
395        assert!(buf.is_empty(), "clean prose should print nothing");
396    }
397
398    #[test]
399    fn lint_line_col_tracks_newlines() {
400        // A run-on on the third line points at the start of that line's sentence.
401        let src = "First line.\nSecond line.\nthird";
402        assert_eq!(line_col(src, 0), (1, 1));
403        assert_eq!(line_col(src, 12), (2, 1));
404        assert_eq!(line_col(src, 25), (3, 1));
405    }
406
407    #[test]
408    fn lint_unknown_option_is_rejected() {
409        let err = run(["lint".to_string(), "--bogus".to_string()]).unwrap_err();
410        assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
411    }
412
413    #[test]
414    fn decide_color_honors_flag_and_env() {
415        assert!(decide_color(false, false));
416        assert!(!decide_color(true, false));
417        assert!(!decide_color(false, true));
418        assert!(!decide_color(true, true));
419    }
420
421    /// Remove ANSI SGR sequences (`ESC [ ... m`) for round-trip checks.
422    fn strip_ansi(s: &str) -> String {
423        let mut out = String::with_capacity(s.len());
424        let mut chars = s.chars();
425        while let Some(c) = chars.next() {
426            if c == '\x1b' {
427                // Consume through the terminating 'm'.
428                for d in chars.by_ref() {
429                    if d == 'm' {
430                        break;
431                    }
432                }
433            } else {
434                out.push(c);
435            }
436        }
437        out
438    }
439}