Skip to main content

colorful_cli/
lib.rs

1//! Colorize English prose by part of speech in the terminal.
2//!
3//! This is a driving adapter: it wires the [`ProseParser`] and
4//! [`ClosedClassLexicon`] together through a `LexicalAnnotator` and renders the
5//! classified token stream as ANSI-colored text. The same classification feeds
6//! the LSP server; here it lands as color in a terminal with no editor.
7
8#![forbid(unsafe_code)]
9#![warn(missing_docs)]
10
11use std::io::{self, Read, Write};
12
13use colorful_core::{Annotator, LexicalAnnotator, Parser, PosClass};
14use colorful_lexicon::ClosedClassLexicon;
15use colorful_parse::ProseParser;
16
17/// The ANSI SGR parameter used to color a class, or `None` to leave it plain.
18fn sgr(class: PosClass) -> Option<&'static str> {
19    match class {
20        PosClass::Function(_) => Some("1;35"), // bold magenta — the "keywords"
21        PosClass::ProperNoun => Some("1;33"),  // bold yellow
22        PosClass::Number => Some("36"),        // cyan
23        PosClass::Quote => Some("32"),         // green
24        PosClass::Punctuation => Some("90"),   // bright black
25        PosClass::Content => None,             // default foreground
26    }
27}
28
29/// Render `source` with ANSI color per part of speech.
30///
31/// When `color` is `false`, `source` is returned unchanged (a faithful
32/// passthrough), so piping through the tool never alters the text.
33#[must_use]
34pub fn colorize(source: &str, color: bool) -> String {
35    if !color {
36        return source.to_string();
37    }
38
39    let tree = ProseParser::new().parse(source);
40    let tokens = LexicalAnnotator::new(ClosedClassLexicon::new()).annotate(source, &tree);
41
42    let mut out = String::with_capacity(source.len() + tokens.len() * 8);
43    let mut prev = 0;
44    for token in tokens {
45        // Emit the gap (whitespace and anything between tokens) verbatim.
46        if token.span.start > prev {
47            out.push_str(source.get(prev..token.span.start).unwrap_or(""));
48        }
49        let text = token.span.slice(source);
50        if let Some(code) = sgr(token.class) {
51            out.push_str("\x1b[");
52            out.push_str(code);
53            out.push('m');
54            out.push_str(text);
55            out.push_str("\x1b[0m");
56        } else {
57            out.push_str(text);
58        }
59        prev = token.span.end;
60    }
61    if prev < source.len() {
62        out.push_str(source.get(prev..).unwrap_or(""));
63    }
64    out
65}
66
67/// Decide whether to emit color, honoring `--no-color` and the `NO_COLOR`
68/// convention (<https://no-color.org/>): color is on unless either is set.
69#[must_use]
70pub fn decide_color(no_color_flag: bool, no_color_env: bool) -> bool {
71    !no_color_flag && !no_color_env
72}
73
74const HELP: &str = "\
75colorful — color English prose by part of speech
76
77USAGE:
78    colorful [OPTIONS] [FILE]
79
80ARGS:
81    FILE          Path to read; omit or use \"-\" to read standard input.
82
83OPTIONS:
84    --no-color    Pass the text through without ANSI color.
85    -h, --help    Show this help.
86
87Color is disabled automatically when the NO_COLOR environment variable is set.
88";
89
90/// Run the CLI over `args` (the program's arguments, excluding `argv[0]`).
91///
92/// # Errors
93///
94/// Returns an error if the input file cannot be read, standard input cannot be
95/// read, or an unknown flag is supplied.
96pub fn run<I>(args: I) -> io::Result<()>
97where
98    I: IntoIterator<Item = String>,
99{
100    let mut no_color_flag = false;
101    let mut path: Option<String> = None;
102    let mut end_of_options = false;
103
104    for arg in args {
105        if end_of_options {
106            path = Some(arg);
107            continue;
108        }
109        match arg.as_str() {
110            "--" => end_of_options = true,
111            "--no-color" => no_color_flag = true,
112            "-h" | "--help" => {
113                print!("{HELP}");
114                return Ok(());
115            }
116            "-" => path = None,
117            other if other.starts_with('-') && other.len() > 1 => {
118                return Err(io::Error::new(
119                    io::ErrorKind::InvalidInput,
120                    format!("unknown option: {other}"),
121                ));
122            }
123            other => path = Some(other.to_string()),
124        }
125    }
126
127    let input = match path {
128        Some(p) => std::fs::read_to_string(p)?,
129        None => {
130            let mut buf = String::new();
131            io::stdin().read_to_string(&mut buf)?;
132            buf
133        }
134    };
135
136    let color = decide_color(no_color_flag, std::env::var_os("NO_COLOR").is_some());
137    let mut stdout = io::stdout().lock();
138    stdout.write_all(colorize(&input, color).as_bytes())?;
139    stdout.flush()
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn passthrough_when_color_disabled() {
148        let s = "The cat is 3.\nA second line.";
149        assert_eq!(colorize(s, false), s);
150    }
151
152    #[test]
153    fn golden_colored_output() {
154        // "The" (function), cat (content), "is" (function), 3 (number),
155        // "." (punctuation), with whitespace preserved verbatim.
156        let got = colorize("The cat is 3.", true);
157        let want = "\x1b[1;35mThe\x1b[0m cat \x1b[1;35mis\x1b[0m \x1b[36m3\x1b[0m\x1b[90m.\x1b[0m";
158        assert_eq!(got, want);
159    }
160
161    #[test]
162    fn golden_proper_noun_output() {
163        // Mid-sentence capitalized "Paris" becomes a (bold yellow) proper noun.
164        let got = colorize("I visited Paris.", true);
165        let want = "\x1b[1;35mI\x1b[0m visited \x1b[1;33mParis\x1b[0m\x1b[90m.\x1b[0m";
166        assert_eq!(got, want);
167    }
168
169    #[test]
170    fn gaps_and_newlines_are_preserved_exactly() {
171        // Stripping all ANSI escapes must reproduce the original source.
172        let src = "Well,  \t\"quoted\"\n  text—here.";
173        let colored = colorize(src, true);
174        let stripped = strip_ansi(&colored);
175        assert_eq!(stripped, src);
176    }
177
178    #[test]
179    fn double_dash_allows_dash_prefixed_paths() {
180        // After `--`, a leading-dash argument is treated as a path: reading it
181        // fails with NotFound, not an "unknown option" InvalidInput.
182        let err = run(["--".to_string(), "-weird.txt".to_string()]).unwrap_err();
183        assert_eq!(err.kind(), io::ErrorKind::NotFound);
184        // Without `--`, the same argument is rejected as an unknown option.
185        let err = run(["-weird.txt".to_string()]).unwrap_err();
186        assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
187    }
188
189    #[test]
190    fn decide_color_honors_flag_and_env() {
191        assert!(decide_color(false, false));
192        assert!(!decide_color(true, false));
193        assert!(!decide_color(false, true));
194        assert!(!decide_color(true, true));
195    }
196
197    /// Remove ANSI SGR sequences (`ESC [ ... m`) for round-trip checks.
198    fn strip_ansi(s: &str) -> String {
199        let mut out = String::with_capacity(s.len());
200        let mut chars = s.chars();
201        while let Some(c) = chars.next() {
202            if c == '\x1b' {
203                // Consume through the terminating 'm'.
204                for d in chars.by_ref() {
205                    if d == 'm' {
206                        break;
207                    }
208                }
209            } else {
210                out.push(c);
211            }
212        }
213        out
214    }
215}