Skip to main content

hjkl_buffer/
listchars.rs

1//! Invisible-character rendering configuration.
2//!
3//! [`ListChars`] holds the glyph substitutions used when
4//! `:set list` is active. Mirrors vim's `listchars` option.
5
6/// Invisibles rendering configuration. Matches vim's `:set listchars`.
7///
8/// When `:set list` is on, the render layer substitutes whitespace characters
9/// with the glyphs configured here. `None` fields mean "no substitution /
10/// not rendered".
11///
12/// Default matches vim's built-in default: `tab:^I,eol:$`.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct ListChars {
15    /// Leading char of a tab expansion (required). E.g. `>` in `tab:>-`.
16    pub tab_lead: char,
17    /// Fill char repeated to next tabstop. `None` = single-glyph tab (no fill).
18    pub tab_fill: Option<char>,
19    /// Substitution for regular spaces. `None` = no substitution (vim default).
20    pub space: Option<char>,
21    /// Substitution for trailing whitespace. `None` = falls back to `space` or no render.
22    pub trail: Option<char>,
23    /// Marker appended after the last char on each line. `None` = no marker.
24    pub eol: Option<char>,
25    /// Substitution for non-breaking spaces (`\u{00a0}`). `None` = no substitution.
26    pub nbsp: Option<char>,
27    /// Char shown at the right edge when a line extends beyond the viewport
28    /// (no-wrap mode). `None` = no marker.
29    /// TODO: deferred — requires viewport edge integration.
30    pub extends: Option<char>,
31    /// Char shown at the left edge when the viewport is scrolled right past
32    /// the line start. `None` = no marker.
33    /// TODO: deferred — requires viewport edge integration.
34    pub precedes: Option<char>,
35}
36
37impl Default for ListChars {
38    fn default() -> Self {
39        // vim built-in default: tab:^I,eol:$
40        Self {
41            tab_lead: '^',
42            tab_fill: Some('I'),
43            space: None,
44            trail: None,
45            eol: Some('$'),
46            nbsp: None,
47            extends: None,
48            precedes: None,
49        }
50    }
51}
52
53impl ListChars {
54    /// Parse a vim-style `listchars` value string.
55    ///
56    /// Accepts comma-separated `key:value` pairs where value is one or two
57    /// chars (UTF-8). `tab` is the only key that may have two chars
58    /// (`tab:lead_fill`); all others take exactly one char.
59    ///
60    /// Returns `Err(String)` with a diagnostic on unknown keys or bad values.
61    pub fn parse(s: &str) -> Result<Self, String> {
62        // Start from a blank slate (all None). The `tab` key is required
63        // for the resulting value to be valid; if the caller omits it the
64        // existing tab_lead/tab_fill remain at the blank-slate defaults
65        // (`^` + `I`) which matches vim's initial default.
66        let mut lc = Self {
67            tab_lead: '^',
68            tab_fill: Some('I'),
69            space: None,
70            trail: None,
71            eol: None,
72            nbsp: None,
73            extends: None,
74            precedes: None,
75        };
76        for raw_part in s.split(',') {
77            // Only trim leading whitespace (not trailing — a trailing space
78            // is a valid single-char value, e.g. `tab:→ ` where space is
79            // the fill char).
80            let part = raw_part.trim_start();
81            if part.is_empty() {
82                continue;
83            }
84            let (key, val) = part
85                .split_once(':')
86                .ok_or_else(|| format!("listchars: missing `:` in `{part}`"))?;
87            let chars: Vec<char> = val.chars().collect();
88            match key {
89                "tab" => match chars.len() {
90                    1 => {
91                        lc.tab_lead = chars[0];
92                        lc.tab_fill = None;
93                    }
94                    2 => {
95                        lc.tab_lead = chars[0];
96                        lc.tab_fill = Some(chars[1]);
97                    }
98                    n => {
99                        return Err(format!(
100                            "listchars: `tab` value must be 1 or 2 chars, got {n}"
101                        ));
102                    }
103                },
104                "space" => lc.space = Some(one_char(key, &chars)?),
105                "trail" => lc.trail = Some(one_char(key, &chars)?),
106                "eol" => lc.eol = Some(one_char(key, &chars)?),
107                "nbsp" => lc.nbsp = Some(one_char(key, &chars)?),
108                "extends" => lc.extends = Some(one_char(key, &chars)?),
109                "precedes" => lc.precedes = Some(one_char(key, &chars)?),
110                other => {
111                    return Err(format!("listchars: unknown key `{other}`"));
112                }
113            }
114        }
115        Ok(lc)
116    }
117
118    /// Canonical string form for `:set listchars?`.
119    ///
120    /// Emits only the fields that are set (non-None), always in the order:
121    /// `tab`, `space`, `trail`, `eol`, `nbsp`, `extends`, `precedes`.
122    pub fn to_canonical_string(&self) -> String {
123        let mut parts: Vec<String> = Vec::new();
124        // tab is always present
125        if let Some(fill) = self.tab_fill {
126            parts.push(format!("tab:{}{}", self.tab_lead, fill));
127        } else {
128            parts.push(format!("tab:{}", self.tab_lead));
129        }
130        if let Some(ch) = self.space {
131            parts.push(format!("space:{ch}"));
132        }
133        if let Some(ch) = self.trail {
134            parts.push(format!("trail:{ch}"));
135        }
136        if let Some(ch) = self.eol {
137            parts.push(format!("eol:{ch}"));
138        }
139        if let Some(ch) = self.nbsp {
140            parts.push(format!("nbsp:{ch}"));
141        }
142        if let Some(ch) = self.extends {
143            parts.push(format!("extends:{ch}"));
144        }
145        if let Some(ch) = self.precedes {
146            parts.push(format!("precedes:{ch}"));
147        }
148        parts.join(",")
149    }
150}
151
152/// Extract exactly one char from `chars`, returning an error if count != 1.
153fn one_char(key: &str, chars: &[char]) -> Result<char, String> {
154    match chars.len() {
155        1 => Ok(chars[0]),
156        n => Err(format!(
157            "listchars: `{key}` value must be exactly 1 char, got {n}"
158        )),
159    }
160}
161
162/// Apply listchars substitutions to a line string.
163///
164/// When `list` is false, returns `Cow::Borrowed(line)` with no allocation.
165/// When `list` is true, walks the line and substitutes:
166/// - `\t` → `tab_lead` + `tab_fill` × (tabstop - col % tabstop - 1)
167/// - trailing spaces → `trail` glyph (if `Some`), else `space` glyph (if `Some`)
168/// - end-of-line → `eol` glyph (if `Some`) appended after all chars
169/// - `\u{00a0}` → `nbsp` glyph (if `Some`)
170/// - regular spaces → `space` glyph (if `Some`)
171///
172/// Note: extends/precedes (viewport edge markers) are deferred — handled by
173/// the renderer at the cell-paint level, not pre-processed here.
174pub fn apply_listchars<'a>(
175    line: &'a str,
176    lc: &ListChars,
177    list: bool,
178    tabstop: usize,
179) -> std::borrow::Cow<'a, str> {
180    if !list {
181        return std::borrow::Cow::Borrowed(line);
182    }
183
184    // Find the index of the first trailing whitespace char.
185    // "trailing whitespace" = spaces/tabs at the end of the line that would
186    // be rendered with `trail` glyph.
187    let trimmed_end = line.trim_end_matches([' ', '\t']).len();
188
189    let mut out = String::with_capacity(line.len() + 8);
190    let mut col: usize = 0; // visible column counter (for tab expansion)
191
192    for (byte_idx, ch) in line.char_indices() {
193        let is_trailing = byte_idx >= trimmed_end;
194        match ch {
195            '\t' => {
196                let spaces = tabstop - (col % tabstop);
197                // tab_lead is always the first cell
198                out.push(lc.tab_lead);
199                col += 1;
200                // fill remaining cells
201                let fill_count = spaces.saturating_sub(1);
202                if let Some(fill) = lc.tab_fill {
203                    for _ in 0..fill_count {
204                        out.push(fill);
205                        col += 1;
206                    }
207                } else {
208                    // single-glyph tab: pad with spaces to honour tabstop
209                    for _ in 0..fill_count {
210                        out.push(' ');
211                        col += 1;
212                    }
213                }
214            }
215            ' ' => {
216                let sub = if is_trailing {
217                    lc.trail.or(lc.space).unwrap_or(' ')
218                } else {
219                    lc.space.unwrap_or(' ')
220                };
221                out.push(sub);
222                col += 1;
223            }
224            '\u{00a0}' => {
225                out.push(lc.nbsp.unwrap_or('\u{00a0}'));
226                col += 1;
227            }
228            other => {
229                out.push(other);
230                col += unicode_width(other);
231            }
232        }
233    }
234
235    // Append eol marker
236    if let Some(eol) = lc.eol {
237        out.push(eol);
238    }
239
240    std::borrow::Cow::Owned(out)
241}
242
243/// Unicode display width for a char (1 for most, 2 for CJK wide chars, 0 for controls).
244#[inline]
245fn unicode_width(ch: char) -> usize {
246    // Use a simple approximation: CJK wide = 2, everything else = 1.
247    // This avoids adding unicode-width as a direct dep here; buffer-tui
248    // uses the real UnicodeWidthChar for rendering.
249    if is_wide(ch) { 2 } else { 1 }
250}
251
252/// Very small is_wide predicate covering the most common CJK blocks.
253#[inline]
254fn is_wide(ch: char) -> bool {
255    matches!(ch,
256        '\u{1100}'..='\u{115F}'   // Hangul Jamo
257        | '\u{2E80}'..='\u{303E}' // CJK Radicals
258        | '\u{3041}'..='\u{33BF}' // Hiragana/Katakana/CJK
259        | '\u{33FF}'..='\u{A4CF}' // CJK Unified
260        | '\u{A960}'..='\u{A97F}' // Hangul extension
261        | '\u{AC00}'..='\u{D7FF}' // Hangul Syllables
262        | '\u{F900}'..='\u{FAFF}' // CJK Compatibility
263        | '\u{FE10}'..='\u{FE1F}' // Vertical forms
264        | '\u{FE30}'..='\u{FE6F}' // CJK Compatibility forms
265        | '\u{FF00}'..='\u{FF60}' // Fullwidth
266        | '\u{FFE0}'..='\u{FFE6}' // Fullwidth signs
267        | '\u{1B000}'..='\u{1B0FF}' // Kana Supplement
268        | '\u{1F004}'              // Mahjong tile
269        | '\u{1F0CF}'              // Playing card
270        | '\u{1F200}'..='\u{1F2FF}' // Enclosed CJK
271        | '\u{20000}'..='\u{2A6DF}' // CJK Unified Ext B
272        | '\u{2A700}'..='\u{2CEAF}' // CJK Unified Ext C/D/E
273        | '\u{2CEB0}'..='\u{2EBEF}' // CJK Unified Ext F
274        | '\u{30000}'..='\u{3134F}' // CJK Unified Ext G
275    )
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281    use std::borrow::Cow;
282
283    // ---- ListChars::parse tests ----
284
285    #[test]
286    fn listchars_parse_basic() {
287        let lc = ListChars::parse("tab:>-,eol:$").unwrap();
288        assert_eq!(lc.tab_lead, '>');
289        assert_eq!(lc.tab_fill, Some('-'));
290        assert_eq!(lc.eol, Some('$'));
291        assert_eq!(lc.space, None);
292        assert_eq!(lc.trail, None);
293    }
294
295    #[test]
296    fn listchars_parse_all_keys() {
297        let lc =
298            ListChars::parse("tab:>-,space:·,trail:~,eol:¶,nbsp:_,extends:>,precedes:<").unwrap();
299        assert_eq!(lc.tab_lead, '>');
300        assert_eq!(lc.tab_fill, Some('-'));
301        assert_eq!(lc.space, Some('·'));
302        assert_eq!(lc.trail, Some('~'));
303        assert_eq!(lc.eol, Some('¶'));
304        assert_eq!(lc.nbsp, Some('_'));
305        assert_eq!(lc.extends, Some('>'));
306        assert_eq!(lc.precedes, Some('<'));
307    }
308
309    #[test]
310    fn listchars_parse_utf8() {
311        let lc = ListChars::parse("tab:→ ,eol:¬").unwrap();
312        assert_eq!(lc.tab_lead, '→');
313        assert_eq!(lc.tab_fill, Some(' '));
314        assert_eq!(lc.eol, Some('¬'));
315    }
316
317    #[test]
318    fn listchars_parse_invalid_no_colon() {
319        assert!(ListChars::parse("tab").is_err());
320    }
321
322    #[test]
323    fn listchars_parse_invalid_three_char_tab() {
324        assert!(ListChars::parse("tab:abc").is_err());
325    }
326
327    #[test]
328    fn listchars_parse_invalid_unknown_key() {
329        assert!(ListChars::parse("bogus:x").is_err());
330    }
331
332    #[test]
333    fn listchars_parse_invalid_returns_err() {
334        // All three error cases from the spec
335        assert!(ListChars::parse("tab").is_err(), "no colon");
336        assert!(ListChars::parse("tab:abc").is_err(), "3-char tab value");
337        assert!(ListChars::parse("bogus:x").is_err(), "unknown key");
338    }
339
340    #[test]
341    fn listchars_to_string_roundtrip() {
342        let s = "tab:>-,space:·,trail:~,eol:¶,nbsp:_,extends:>,precedes:<";
343        let lc1 = ListChars::parse(s).unwrap();
344        let canonical = lc1.to_canonical_string();
345        let lc2 = ListChars::parse(&canonical).unwrap();
346        assert_eq!(lc1, lc2);
347    }
348
349    #[test]
350    fn listchars_default_matches_vim() {
351        let lc = ListChars::default();
352        assert_eq!(lc.tab_lead, '^');
353        assert_eq!(lc.tab_fill, Some('I'));
354        assert_eq!(lc.eol, Some('$'));
355        assert_eq!(lc.space, None);
356        assert_eq!(lc.trail, None);
357        assert_eq!(lc.nbsp, None);
358    }
359
360    // ---- apply_listchars tests ----
361
362    #[test]
363    fn apply_listchars_off_returns_borrowed() {
364        let lc = ListChars::default();
365        let result = apply_listchars("hello world", &lc, false, 4);
366        assert!(
367            matches!(result, Cow::Borrowed(_)),
368            "expected Borrowed when list=false"
369        );
370    }
371
372    #[test]
373    fn apply_listchars_tab_expansion() {
374        // tab:>- at col 0 with tabstop=4 → ">---foo"
375        let lc = ListChars::parse("tab:>-,eol:$").unwrap();
376        let result = apply_listchars("\tfoo", &lc, true, 4);
377        // tab at col 0 → 4 wide: '>' + '-' + '-' + '-', then "foo", then '$'
378        assert_eq!(result.as_ref(), ">---foo$");
379    }
380
381    #[test]
382    fn apply_listchars_trail_substitution() {
383        let lc = ListChars::parse("tab:>-,trail:·").unwrap();
384        // eol=None so no eol marker; space=None so interior spaces stay as ' '
385        let result = apply_listchars("foo   ", &lc, true, 4);
386        assert_eq!(result.as_ref(), "foo···");
387    }
388
389    #[test]
390    fn apply_listchars_eol_appended() {
391        let lc = ListChars::parse("tab:>-,eol:¶").unwrap();
392        let result = apply_listchars("foo", &lc, true, 4);
393        assert_eq!(result.as_ref(), "foo¶");
394    }
395
396    #[test]
397    fn apply_listchars_nbsp_substitution() {
398        let lc = ListChars::parse("tab:>-,nbsp:_").unwrap();
399        let result = apply_listchars("a\u{00a0}b", &lc, true, 4);
400        assert_eq!(result.as_ref(), "a_b");
401    }
402
403    #[test]
404    fn apply_listchars_combined() {
405        let lc = ListChars::parse("tab:>-,space:·,trail:~,eol:¶,nbsp:_").unwrap();
406        // line: tab, space, 'x', nbsp, trailing space
407        let input = "\t x\u{00a0} ";
408        let result = apply_listchars(input, &lc, true, 4);
409        // tab at col 0 with tabstop=4 → ">---"
410        // interior space → '·'
411        // 'x' → 'x'
412        // nbsp → '_'
413        // trailing space → '~'
414        // eol → '¶'
415        assert_eq!(result.as_ref(), ">---·x_~¶");
416    }
417}