termgrid_core/
text.rs

1//! Styled text utilities.
2//!
3//! This module provides a small, generic representation of "styled spans" and
4//! helper functions for measuring and manipulating them in terms of terminal
5//! cell width.
6
7use crate::registry::GlyphRegistry;
8use crate::{style::is_plain_style, Style};
9use serde::{Deserialize, Serialize};
10
11/// A segment of text with a single style.
12///
13/// Invariants expected by helpers in this module:
14/// - `text` may be empty, but most helpers will drop empty spans.
15/// - Adjacent spans with identical `style` can be coalesced.
16#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
17pub struct Span {
18    pub text: String,
19    #[serde(default, skip_serializing_if = "is_plain_style")]
20    pub style: Style,
21}
22
23impl Span {
24    /// Construct a new span from text and style.
25    ///
26    /// Note: many helpers treat empty spans as non-semantic and will drop
27    /// them during normalization.
28    pub fn new<T: Into<String>>(text: T, style: Style) -> Self {
29        Self {
30            text: text.into(),
31            style,
32        }
33    }
34}
35
36/// A convenience alias for a sequence of styled spans.
37pub type Spans = Vec<Span>;
38
39/// Returns the concatenated plain-text content of spans.
40pub fn spans_plain_text(spans: &[Span]) -> String {
41    let mut out = String::new();
42    for s in spans {
43        out.push_str(&s.text);
44    }
45    out
46}
47
48/// Normalize spans by:
49/// - removing empty spans
50/// - coalescing adjacent spans with identical styles
51pub fn normalize_spans(spans: &[Span]) -> Vec<Span> {
52    let mut out: Vec<Span> = Vec::new();
53    for s in spans {
54        if s.text.is_empty() {
55            continue;
56        }
57        if let Some(last) = out.last_mut() {
58            if last.style == s.style {
59                last.text.push_str(&s.text);
60                continue;
61            }
62        }
63        out.push(s.clone());
64    }
65    out
66}
67
68/// Measure a plain string in terminal cells using the provided glyph registry.
69///
70/// This measures display width by iterating grapheme clusters and consulting
71/// the glyph registry for width policy.
72pub fn measure_cells_text(registry: &GlyphRegistry, text: &str) -> usize {
73    use unicode_segmentation::UnicodeSegmentation;
74    let mut w: usize = 0;
75    for g in text.graphemes(true) {
76        w = w.saturating_add(registry.width(g) as usize);
77    }
78    w
79}
80
81/// Measure spans in terminal cells using the provided glyph registry.
82pub fn measure_cells_spans(registry: &GlyphRegistry, spans: &[Span]) -> usize {
83    spans
84        .iter()
85        .map(|s| measure_cells_text(registry, &s.text))
86        .sum()
87}
88
89fn clip_text_to_cells_internal(
90    registry: &GlyphRegistry,
91    text: &str,
92    max_cells: usize,
93) -> (String, usize, bool) {
94    use unicode_segmentation::UnicodeSegmentation;
95
96    if max_cells == 0 || text.is_empty() {
97        return (String::new(), 0, !text.is_empty());
98    }
99
100    let mut out = String::new();
101    let mut used: usize = 0;
102    let mut clipped = false;
103
104    for g in text.graphemes(true) {
105        let gw = registry.width(g) as usize;
106        if used.saturating_add(gw) > max_cells {
107            clipped = true;
108            break;
109        }
110        used = used.saturating_add(gw);
111        out.push_str(g);
112    }
113
114    // If we did not iterate all graphemes, we clipped.
115    if !clipped {
116        // Detect remaining content without re-walking too much.
117        // If the output differs from the input, we clipped.
118        if out.len() != text.len() {
119            clipped = true;
120        }
121    }
122
123    (out, used, clipped)
124}
125
126/// Clip a plain string to at most `w` cells.
127///
128/// Returns the clipped string and whether clipping occurred.
129pub fn clip_to_cells_text(registry: &GlyphRegistry, text: &str, w: usize) -> (String, bool) {
130    let (out, _used, clipped) = clip_text_to_cells_internal(registry, text, w);
131    (out, clipped)
132}
133
134/// Clip spans to at most `w` cells.
135///
136/// Returns clipped spans and whether clipping occurred.
137pub fn clip_to_cells_spans(
138    registry: &GlyphRegistry,
139    spans: &[Span],
140    w: usize,
141) -> (Vec<Span>, bool) {
142    if w == 0 {
143        return (Vec::new(), !spans.is_empty());
144    }
145
146    let mut out: Vec<Span> = Vec::new();
147    let mut used: usize = 0;
148    let mut clipped = false;
149
150    for s in spans {
151        if s.text.is_empty() {
152            continue;
153        }
154        let remaining = w.saturating_sub(used);
155        if remaining == 0 {
156            clipped = true;
157            break;
158        }
159        let (clipped_text, text_used, did_clip) =
160            clip_text_to_cells_internal(registry, &s.text, remaining);
161        if !clipped_text.is_empty() {
162            out.push(Span::new(clipped_text, s.style));
163        }
164        used = used.saturating_add(text_used);
165        if did_clip {
166            clipped = true;
167            break;
168        }
169    }
170
171    (normalize_spans(&out), clipped)
172}
173
174/// Ellipsize a plain string to at most `w` cells using the provided `ellipsis`.
175///
176/// If the `ellipsis` itself does not fit, it will be clipped to `w`.
177pub fn ellipsis_to_cells_text(
178    registry: &GlyphRegistry,
179    text: &str,
180    w: usize,
181    ellipsis: &str,
182) -> String {
183    if w == 0 {
184        return String::new();
185    }
186    if measure_cells_text(registry, text) <= w {
187        return text.to_string();
188    }
189
190    let ell_w = measure_cells_text(registry, ellipsis);
191    if ell_w >= w {
192        let (e, _clipped) = clip_to_cells_text(registry, ellipsis, w);
193        return e;
194    }
195
196    let avail = w.saturating_sub(ell_w);
197    let (prefix, _clipped) = clip_to_cells_text(registry, text, avail);
198    let mut out = prefix;
199    out.push_str(ellipsis);
200    out
201}
202
203/// Ellipsize spans to at most `w` cells using the provided `ellipsis_span`.
204///
205/// If the ellipsis span does not fit, it will be clipped to `w`.
206pub fn ellipsis_to_cells_spans(
207    registry: &GlyphRegistry,
208    spans: &[Span],
209    w: usize,
210    ellipsis_span: &Span,
211) -> Vec<Span> {
212    if w == 0 {
213        return Vec::new();
214    }
215
216    if measure_cells_spans(registry, spans) <= w {
217        return normalize_spans(spans);
218    }
219
220    let ell_w = measure_cells_text(registry, &ellipsis_span.text);
221    if ell_w >= w {
222        let (t, _clipped) = clip_to_cells_text(registry, &ellipsis_span.text, w);
223        return normalize_spans(&[Span::new(t, ellipsis_span.style)]);
224    }
225
226    let avail = w.saturating_sub(ell_w);
227    let (mut prefix, _clipped) = clip_to_cells_spans(registry, spans, avail);
228    prefix.push(Span::new(ellipsis_span.text.clone(), ellipsis_span.style));
229    normalize_spans(&prefix)
230}
231
232/// Options controlling span-aware word wrapping.
233///
234/// These settings are intentionally generic and suitable for help viewers,
235/// log viewers, search UIs, and other terminal applications.
236#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
237#[serde(default)]
238pub struct WrapOpts {
239    /// Preserve whitespace runs exactly as provided.
240    ///
241    /// When false, whitespace runs are normalized to a single ASCII space, and
242    /// leading whitespace at the beginning of a visual line is dropped.
243    pub preserve_spaces: bool,
244
245    /// If a single non-whitespace token exceeds the available width, hard-break
246    /// it by grapheme cluster.
247    pub hard_break_long_tokens: bool,
248
249    /// Trim trailing whitespace at the end of each visual line.
250    pub trim_end: bool,
251
252    /// Optional prefix to apply to continuation lines produced by wrapping.
253    ///
254    /// A continuation line is a visual line created because a token would
255    /// exceed the available `width` and the line is wrapped. Explicit newline
256    /// characters do not produce continuation lines.
257    ///
258    /// The prefix is only applied if its measured cell width is strictly less
259    /// than `width`.
260    #[serde(default, skip_serializing_if = "Option::is_none")]
261    pub continuation_prefix: Option<Vec<Span>>,
262}
263
264impl Default for WrapOpts {
265    fn default() -> Self {
266        Self {
267            preserve_spaces: false,
268            hard_break_long_tokens: true,
269            trim_end: true,
270            continuation_prefix: None,
271        }
272    }
273}
274
275pub(crate) fn is_default_wrap_opts(o: &WrapOpts) -> bool {
276    *o == WrapOpts::default()
277}
278
279#[derive(Clone, Debug, PartialEq, Eq)]
280enum TokenKind {
281    Text,
282    Space,
283    Newline,
284}
285
286#[derive(Clone, Debug, PartialEq, Eq)]
287struct Token {
288    text: String,
289    style: Style,
290    kind: TokenKind,
291}
292
293fn tokenize_spans(spans: &[Span], preserve_spaces: bool) -> Vec<Token> {
294    let spans = normalize_spans(spans);
295    let mut out: Vec<Token> = Vec::new();
296
297    for s in spans {
298        // Split on explicit newlines. Newlines are always hard line breaks.
299        let mut first = true;
300        for part in s.text.split('\n') {
301            if !first {
302                out.push(Token {
303                    text: "\n".to_string(),
304                    style: s.style,
305                    kind: TokenKind::Newline,
306                });
307            }
308            first = false;
309
310            if part.is_empty() {
311                continue;
312            }
313
314            // Split into runs of whitespace vs non-whitespace.
315            let mut buf = String::new();
316            let mut in_space: Option<bool> = None;
317            for ch in part.chars() {
318                let is_space = ch.is_whitespace();
319                match in_space {
320                    None => {
321                        in_space = Some(is_space);
322                        buf.push(ch);
323                    }
324                    Some(prev) if prev == is_space => {
325                        buf.push(ch);
326                    }
327                    Some(prev) => {
328                        // flush
329                        if prev {
330                            let txt = if preserve_spaces {
331                                buf.clone()
332                            } else {
333                                " ".to_string()
334                            };
335                            out.push(Token {
336                                text: txt,
337                                style: s.style,
338                                kind: TokenKind::Space,
339                            });
340                        } else {
341                            out.push(Token {
342                                text: buf.clone(),
343                                style: s.style,
344                                kind: TokenKind::Text,
345                            });
346                        }
347                        buf.clear();
348                        in_space = Some(is_space);
349                        buf.push(ch);
350                    }
351                }
352            }
353
354            if !buf.is_empty() {
355                if in_space.unwrap_or(false) {
356                    let txt = if preserve_spaces {
357                        buf
358                    } else {
359                        " ".to_string()
360                    };
361                    out.push(Token {
362                        text: txt,
363                        style: s.style,
364                        kind: TokenKind::Space,
365                    });
366                } else {
367                    out.push(Token {
368                        text: buf,
369                        style: s.style,
370                        kind: TokenKind::Text,
371                    });
372                }
373            }
374        }
375    }
376
377    out
378}
379
380fn trim_trailing_spaces(registry: &GlyphRegistry, spans: &mut Vec<Span>) {
381    // Remove trailing whitespace spans, but do not split graphemes in the middle.
382    // This function is conservative: it trims ASCII whitespace at the end.
383    // If a caller wants more aggressive trimming, do it before constructing spans.
384    while let Some(last) = spans.last_mut() {
385        if last.text.is_empty() {
386            spans.pop();
387            continue;
388        }
389        // Trim only ASCII spaces/tabs at the end.
390        let trimmed = last.text.trim_end_matches(&[' ', '\t', '\r'] as &[char]);
391        if trimmed.len() == last.text.len() {
392            break;
393        }
394        last.text = trimmed.to_string();
395        if last.text.is_empty() {
396            spans.pop();
397        }
398    }
399
400    // Re-normalize to coalesce styles after trimming.
401    let norm = normalize_spans(spans);
402    spans.clear();
403    spans.extend(norm);
404
405    // Keep registry referenced to avoid unused warnings under some feature sets.
406    let _ = registry;
407}
408
409fn push_token(line: &mut Vec<Span>, tok: &Token) {
410    if tok.text.is_empty() {
411        return;
412    }
413    line.push(Span::new(tok.text.clone(), tok.style));
414}
415
416fn push_span_text(line: &mut Vec<Span>, text: &str, style: Style) {
417    if text.is_empty() {
418        return;
419    }
420    line.push(Span::new(text.to_string(), style));
421}
422
423fn split_token_by_width<'a>(
424    registry: &GlyphRegistry,
425    text: &'a str,
426    max_cells: usize,
427) -> (String, &'a str) {
428    let (chunk, used, _clipped) = clip_text_to_cells_internal(registry, text, max_cells);
429    if chunk.is_empty() || used == 0 {
430        return (String::new(), text);
431    }
432    let rest = &text[chunk.len()..];
433    (chunk, rest)
434}
435
436fn hard_break_token(registry: &GlyphRegistry, tok: &Token, width: usize) -> Vec<Vec<Span>> {
437    // Break a single token into multiple visual lines by grapheme cluster.
438    // Each produced line contains a single span with the token's style.
439    let mut lines: Vec<Vec<Span>> = Vec::new();
440    let mut remaining = tok.text.as_str();
441
442    loop {
443        if remaining.is_empty() {
444            break;
445        }
446        let (chunk, used, _clipped) = clip_text_to_cells_internal(registry, remaining, width);
447        if chunk.is_empty() || used == 0 {
448            // Defensive: avoid infinite loop if width is too small for any grapheme.
449            break;
450        }
451        lines.push(vec![Span::new(chunk.clone(), tok.style)]);
452        // Advance remaining by chunk length in bytes.
453        remaining = &remaining[chunk.len()..];
454    }
455
456    lines
457}
458
459/// Wrap spans word-wise into a sequence of visual lines.
460///
461/// - Word boundaries are whitespace runs.
462/// - Newlines always force a hard line break.
463/// - When `opts.preserve_spaces` is false, whitespace is normalized to a single
464///   ASCII space and leading spaces at the start of a line are dropped.
465/// - When a single token exceeds `width` and `opts.hard_break_long_tokens` is
466///   true, the token is hard-broken by grapheme cluster.
467pub fn wrap_spans_wordwise(
468    registry: &GlyphRegistry,
469    spans: &[Span],
470    width: usize,
471    opts: &WrapOpts,
472) -> Vec<Vec<Span>> {
473    if width == 0 {
474        return Vec::new();
475    }
476
477    let tokens = tokenize_spans(spans, opts.preserve_spaces);
478    let mut q: std::collections::VecDeque<Token> = tokens.into();
479
480    let mut lines: Vec<Vec<Span>> = Vec::new();
481    let mut line: Vec<Span> = Vec::new();
482    let mut line_w: usize = 0;
483
484    let begin_line = |line: &mut Vec<Span>, line_w: &mut usize, continuation: bool| {
485        if !continuation {
486            return;
487        }
488        let Some(prefix) = &opts.continuation_prefix else {
489            return;
490        };
491        let mut p = normalize_spans(prefix);
492        if p.is_empty() {
493            return;
494        }
495        let pw = measure_cells_spans(registry, &p);
496        // Only apply when the prefix fits and leaves at least 1 cell.
497        if pw >= width {
498            return;
499        }
500        line.append(&mut p);
501        *line_w = line_w.saturating_add(pw);
502    };
503
504    let flush_line = |lines: &mut Vec<Vec<Span>>, line: &mut Vec<Span>, line_w: &mut usize| {
505        let mut out = normalize_spans(line);
506        if opts.trim_end {
507            trim_trailing_spaces(registry, &mut out);
508        }
509        lines.push(out);
510        line.clear();
511        *line_w = 0;
512    };
513
514    while let Some(tok) = q.pop_front() {
515        match tok.kind {
516            TokenKind::Newline => {
517                flush_line(&mut lines, &mut line, &mut line_w);
518                // Explicit newlines reset continuation.
519            }
520            TokenKind::Space => {
521                if !opts.preserve_spaces {
522                    // drop leading spaces
523                    if line.is_empty() {
524                        continue;
525                    }
526                }
527                let tok_w = measure_cells_text(registry, &tok.text);
528                if line_w.saturating_add(tok_w) > width {
529                    flush_line(&mut lines, &mut line, &mut line_w);
530                    begin_line(&mut line, &mut line_w, true);
531                    // Discard the wrapping whitespace token. This matches
532                    // typical word-wrap semantics and avoids leading separators
533                    // on continuation lines.
534                } else {
535                    push_token(&mut line, &tok);
536                    line_w = line_w.saturating_add(tok_w);
537                }
538            }
539            TokenKind::Text => {
540                let tok_w = measure_cells_text(registry, &tok.text);
541
542                if tok_w > width {
543                    if opts.hard_break_long_tokens {
544                        // If there is remaining space on the current line,
545                        // fill it with the first chunk of this token.
546                        if !line.is_empty() && line_w < width {
547                            let avail = width - line_w;
548                            let (first, rest) = split_token_by_width(registry, &tok.text, avail);
549                            if !first.is_empty() {
550                                push_span_text(&mut line, &first, tok.style);
551                                line_w =
552                                    line_w.saturating_add(measure_cells_text(registry, &first));
553                            }
554                            flush_line(&mut lines, &mut line, &mut line_w);
555                            if !rest.is_empty() {
556                                q.push_front(Token {
557                                    text: rest.to_string(),
558                                    style: tok.style,
559                                    kind: TokenKind::Text,
560                                });
561                            }
562                            continue;
563                        }
564
565                        if !line.is_empty() {
566                            flush_line(&mut lines, &mut line, &mut line_w);
567                        }
568
569                        let broken = hard_break_token(registry, &tok, width);
570                        for (i, b) in broken.into_iter().enumerate() {
571                            if i == 0 {
572                                lines.push(normalize_spans(&b));
573                                continue;
574                            }
575                            // Continuation lines from hard-break may be prefixed.
576                            if opts.continuation_prefix.is_some() {
577                                let mut out: Vec<Span> = Vec::new();
578                                let mut out_w: usize = 0;
579                                begin_line(&mut out, &mut out_w, true);
580                                out.extend(normalize_spans(&b));
581                                lines.push(normalize_spans(&out));
582                            } else {
583                                lines.push(normalize_spans(&b));
584                            }
585                        }
586                        continue;
587                    } else {
588                        if !line.is_empty() {
589                            flush_line(&mut lines, &mut line, &mut line_w);
590                        }
591                        let (clipped, _did) = clip_to_cells_text(registry, &tok.text, width);
592                        if !clipped.is_empty() {
593                            lines.push(vec![Span::new(clipped, tok.style)]);
594                        }
595                        continue;
596                    }
597                }
598
599                if line_w.saturating_add(tok_w) > width {
600                    flush_line(&mut lines, &mut line, &mut line_w);
601                    begin_line(&mut line, &mut line_w, true);
602                }
603                push_token(&mut line, &tok);
604                line_w = line_w.saturating_add(tok_w);
605            }
606        }
607    }
608
609    if !line.is_empty() || lines.is_empty() {
610        flush_line(&mut lines, &mut line, &mut line_w);
611    }
612
613    lines
614}
615
616// -----------------------------------------------------------------------------
617// Highlighting helpers
618// -----------------------------------------------------------------------------
619
620/// Apply highlighting to spans using grapheme-index ranges.
621///
622/// Ranges are expressed as half-open intervals `(start, end)` in **grapheme
623/// indices** over the concatenated plain text (`spans_plain_text`).
624///
625/// Highlighting is applied by *overlaying* the provided `highlight_style` onto
626/// the base span style:
627/// - `fg` / `bg` fields from `highlight_style` replace the base fields when
628///   they are `Some`.
629/// - `dim`, `bold`, `italic`, `underline`, `blink`, `inverse`, and `strike` are ORed with the base flags.
630///
631/// Invalid ranges (where `start >= end`) are ignored.
632pub fn apply_highlight(
633    spans: &[Span],
634    ranges: &[(usize, usize)],
635    highlight_style: Style,
636) -> Vec<Span> {
637    use unicode_segmentation::UnicodeSegmentation;
638
639    if spans.is_empty() || ranges.is_empty() {
640        return normalize_spans(spans);
641    }
642
643    // Normalize and merge ranges (sorted, non-overlapping).
644    let mut rs: Vec<(usize, usize)> = ranges.iter().copied().filter(|(s, e)| s < e).collect();
645    if rs.is_empty() {
646        return normalize_spans(spans);
647    }
648    rs.sort_by_key(|(s, _e)| *s);
649    let mut merged: Vec<(usize, usize)> = Vec::new();
650    for (s, e) in rs {
651        match merged.last_mut() {
652            None => merged.push((s, e)),
653            Some((_ls, le)) => {
654                if s <= *le {
655                    *le = (*le).max(e);
656                } else {
657                    merged.push((s, e));
658                }
659            }
660        }
661    }
662
663    let spans = normalize_spans(spans);
664    let mut out: Vec<Span> = Vec::new();
665
666    let mut global_g: usize = 0;
667    let mut r_idx: usize = 0;
668
669    for s in spans {
670        if s.text.is_empty() {
671            continue;
672        }
673
674        for g in s.text.graphemes(true) {
675            // Advance current range if we've passed it.
676            while r_idx < merged.len() && global_g >= merged[r_idx].1 {
677                r_idx += 1;
678            }
679
680            let in_range = if r_idx < merged.len() {
681                let (rs, re) = merged[r_idx];
682                global_g >= rs && global_g < re
683            } else {
684                false
685            };
686
687            let style = if in_range {
688                s.style.overlay(highlight_style)
689            } else {
690                s.style
691            };
692
693            // Append grapheme with appropriate style, coalescing where possible.
694            if let Some(last) = out.last_mut() {
695                if last.style == style {
696                    last.text.push_str(g);
697                } else {
698                    out.push(Span::new(g.to_string(), style));
699                }
700            } else {
701                out.push(Span::new(g.to_string(), style));
702            }
703
704            global_g = global_g.saturating_add(1);
705        }
706    }
707
708    normalize_spans(&out)
709}
termgrid_core/text.rs

termgrid_core/
text.rs