#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ListChars {
pub tab_lead: char,
pub tab_fill: Option<char>,
pub space: Option<char>,
pub trail: Option<char>,
pub eol: Option<char>,
pub nbsp: Option<char>,
pub extends: Option<char>,
pub precedes: Option<char>,
}
impl Default for ListChars {
fn default() -> Self {
Self {
tab_lead: '^',
tab_fill: Some('I'),
space: None,
trail: None,
eol: Some('$'),
nbsp: None,
extends: None,
precedes: None,
}
}
}
impl ListChars {
pub fn parse(s: &str) -> Result<Self, String> {
let mut lc = Self {
tab_lead: '^',
tab_fill: Some('I'),
space: None,
trail: None,
eol: None,
nbsp: None,
extends: None,
precedes: None,
};
for raw_part in s.split(',') {
let part = raw_part.trim_start();
if part.is_empty() {
continue;
}
let (key, val) = part
.split_once(':')
.ok_or_else(|| format!("listchars: missing `:` in `{part}`"))?;
let chars: Vec<char> = val.chars().collect();
match key {
"tab" => match chars.len() {
1 => {
lc.tab_lead = chars[0];
lc.tab_fill = None;
}
2 => {
lc.tab_lead = chars[0];
lc.tab_fill = Some(chars[1]);
}
n => {
return Err(format!(
"listchars: `tab` value must be 1 or 2 chars, got {n}"
));
}
},
"space" => lc.space = Some(one_char(key, &chars)?),
"trail" => lc.trail = Some(one_char(key, &chars)?),
"eol" => lc.eol = Some(one_char(key, &chars)?),
"nbsp" => lc.nbsp = Some(one_char(key, &chars)?),
"extends" => lc.extends = Some(one_char(key, &chars)?),
"precedes" => lc.precedes = Some(one_char(key, &chars)?),
other => {
return Err(format!("listchars: unknown key `{other}`"));
}
}
}
Ok(lc)
}
pub fn to_canonical_string(&self) -> String {
let mut parts: Vec<String> = Vec::new();
if let Some(fill) = self.tab_fill {
parts.push(format!("tab:{}{}", self.tab_lead, fill));
} else {
parts.push(format!("tab:{}", self.tab_lead));
}
if let Some(ch) = self.space {
parts.push(format!("space:{ch}"));
}
if let Some(ch) = self.trail {
parts.push(format!("trail:{ch}"));
}
if let Some(ch) = self.eol {
parts.push(format!("eol:{ch}"));
}
if let Some(ch) = self.nbsp {
parts.push(format!("nbsp:{ch}"));
}
if let Some(ch) = self.extends {
parts.push(format!("extends:{ch}"));
}
if let Some(ch) = self.precedes {
parts.push(format!("precedes:{ch}"));
}
parts.join(",")
}
}
fn one_char(key: &str, chars: &[char]) -> Result<char, String> {
match chars.len() {
1 => Ok(chars[0]),
n => Err(format!(
"listchars: `{key}` value must be exactly 1 char, got {n}"
)),
}
}
pub fn apply_listchars<'a>(
line: &'a str,
lc: &ListChars,
list: bool,
tabstop: usize,
) -> std::borrow::Cow<'a, str> {
if !list {
return std::borrow::Cow::Borrowed(line);
}
let trimmed_end = line.trim_end_matches([' ', '\t']).len();
let mut out = String::with_capacity(line.len() + 8);
let mut col: usize = 0;
for (byte_idx, ch) in line.char_indices() {
let is_trailing = byte_idx >= trimmed_end;
match ch {
'\t' => {
let spaces = tabstop - (col % tabstop);
out.push(lc.tab_lead);
col += 1;
let fill_count = spaces.saturating_sub(1);
if let Some(fill) = lc.tab_fill {
for _ in 0..fill_count {
out.push(fill);
col += 1;
}
} else {
for _ in 0..fill_count {
out.push(' ');
col += 1;
}
}
}
' ' => {
let sub = if is_trailing {
lc.trail.or(lc.space).unwrap_or(' ')
} else {
lc.space.unwrap_or(' ')
};
out.push(sub);
col += 1;
}
'\u{00a0}' => {
out.push(lc.nbsp.unwrap_or('\u{00a0}'));
col += 1;
}
other => {
out.push(other);
col += unicode_width(other);
}
}
}
if let Some(eol) = lc.eol {
out.push(eol);
}
std::borrow::Cow::Owned(out)
}
#[inline]
fn unicode_width(ch: char) -> usize {
if is_wide(ch) { 2 } else { 1 }
}
#[inline]
fn is_wide(ch: char) -> bool {
matches!(ch,
'\u{1100}'..='\u{115F}' | '\u{2E80}'..='\u{303E}' | '\u{3041}'..='\u{33BF}' | '\u{33FF}'..='\u{A4CF}' | '\u{A960}'..='\u{A97F}' | '\u{AC00}'..='\u{D7FF}' | '\u{F900}'..='\u{FAFF}' | '\u{FE10}'..='\u{FE1F}' | '\u{FE30}'..='\u{FE6F}' | '\u{FF00}'..='\u{FF60}' | '\u{FFE0}'..='\u{FFE6}' | '\u{1B000}'..='\u{1B0FF}' | '\u{1F004}' | '\u{1F0CF}' | '\u{1F200}'..='\u{1F2FF}' | '\u{20000}'..='\u{2A6DF}' | '\u{2A700}'..='\u{2CEAF}' | '\u{2CEB0}'..='\u{2EBEF}' | '\u{30000}'..='\u{3134F}' )
}
#[cfg(test)]
mod tests {
use super::*;
use std::borrow::Cow;
#[test]
fn listchars_parse_basic() {
let lc = ListChars::parse("tab:>-,eol:$").unwrap();
assert_eq!(lc.tab_lead, '>');
assert_eq!(lc.tab_fill, Some('-'));
assert_eq!(lc.eol, Some('$'));
assert_eq!(lc.space, None);
assert_eq!(lc.trail, None);
}
#[test]
fn listchars_parse_all_keys() {
let lc =
ListChars::parse("tab:>-,space:·,trail:~,eol:¶,nbsp:_,extends:>,precedes:<").unwrap();
assert_eq!(lc.tab_lead, '>');
assert_eq!(lc.tab_fill, Some('-'));
assert_eq!(lc.space, Some('·'));
assert_eq!(lc.trail, Some('~'));
assert_eq!(lc.eol, Some('¶'));
assert_eq!(lc.nbsp, Some('_'));
assert_eq!(lc.extends, Some('>'));
assert_eq!(lc.precedes, Some('<'));
}
#[test]
fn listchars_parse_utf8() {
let lc = ListChars::parse("tab:→ ,eol:¬").unwrap();
assert_eq!(lc.tab_lead, '→');
assert_eq!(lc.tab_fill, Some(' '));
assert_eq!(lc.eol, Some('¬'));
}
#[test]
fn listchars_parse_invalid_no_colon() {
assert!(ListChars::parse("tab").is_err());
}
#[test]
fn listchars_parse_invalid_three_char_tab() {
assert!(ListChars::parse("tab:abc").is_err());
}
#[test]
fn listchars_parse_invalid_unknown_key() {
assert!(ListChars::parse("bogus:x").is_err());
}
#[test]
fn listchars_parse_invalid_returns_err() {
assert!(ListChars::parse("tab").is_err(), "no colon");
assert!(ListChars::parse("tab:abc").is_err(), "3-char tab value");
assert!(ListChars::parse("bogus:x").is_err(), "unknown key");
}
#[test]
fn listchars_to_string_roundtrip() {
let s = "tab:>-,space:·,trail:~,eol:¶,nbsp:_,extends:>,precedes:<";
let lc1 = ListChars::parse(s).unwrap();
let canonical = lc1.to_canonical_string();
let lc2 = ListChars::parse(&canonical).unwrap();
assert_eq!(lc1, lc2);
}
#[test]
fn listchars_default_matches_vim() {
let lc = ListChars::default();
assert_eq!(lc.tab_lead, '^');
assert_eq!(lc.tab_fill, Some('I'));
assert_eq!(lc.eol, Some('$'));
assert_eq!(lc.space, None);
assert_eq!(lc.trail, None);
assert_eq!(lc.nbsp, None);
}
#[test]
fn apply_listchars_off_returns_borrowed() {
let lc = ListChars::default();
let result = apply_listchars("hello world", &lc, false, 4);
assert!(
matches!(result, Cow::Borrowed(_)),
"expected Borrowed when list=false"
);
}
#[test]
fn apply_listchars_tab_expansion() {
let lc = ListChars::parse("tab:>-,eol:$").unwrap();
let result = apply_listchars("\tfoo", &lc, true, 4);
assert_eq!(result.as_ref(), ">---foo$");
}
#[test]
fn apply_listchars_trail_substitution() {
let lc = ListChars::parse("tab:>-,trail:·").unwrap();
let result = apply_listchars("foo ", &lc, true, 4);
assert_eq!(result.as_ref(), "foo···");
}
#[test]
fn apply_listchars_eol_appended() {
let lc = ListChars::parse("tab:>-,eol:¶").unwrap();
let result = apply_listchars("foo", &lc, true, 4);
assert_eq!(result.as_ref(), "foo¶");
}
#[test]
fn apply_listchars_nbsp_substitution() {
let lc = ListChars::parse("tab:>-,nbsp:_").unwrap();
let result = apply_listchars("a\u{00a0}b", &lc, true, 4);
assert_eq!(result.as_ref(), "a_b");
}
#[test]
fn apply_listchars_combined() {
let lc = ListChars::parse("tab:>-,space:·,trail:~,eol:¶,nbsp:_").unwrap();
let input = "\t x\u{00a0} ";
let result = apply_listchars(input, &lc, true, 4);
assert_eq!(result.as_ref(), ">---·x_~¶");
}
}