Skip to main content

fresh_core/
text_property.rs

1//! Text properties for embedding metadata in text ranges
2//!
3//! This module provides Emacs-style text properties that allow embedding
4//! arbitrary metadata (like source locations, severity levels, etc.) in
5//! specific ranges of text. This is essential for virtual buffers where
6//! each line might represent a diagnostic, search result, or other structured data.
7
8use crate::api::OverlayOptions;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::ops::Range;
12
13/// A text property that associates metadata with a range of text
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ts_rs::TS)]
15#[ts(export)]
16pub struct TextProperty {
17    /// Start byte offset (inclusive)
18    pub start: usize,
19    /// End byte offset (exclusive)
20    pub end: usize,
21    /// Arbitrary properties as key-value pairs
22    #[ts(type = "Record<string, any>")]
23    pub properties: HashMap<String, serde_json::Value>,
24}
25
26impl TextProperty {
27    /// Create a new text property for a range
28    pub fn new(start: usize, end: usize) -> Self {
29        Self {
30            start,
31            end,
32            properties: HashMap::new(),
33        }
34    }
35
36    /// Add a property
37    pub fn with_property(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
38        self.properties.insert(key.into(), value);
39        self
40    }
41
42    /// Set multiple properties at once
43    pub fn with_properties(mut self, props: HashMap<String, serde_json::Value>) -> Self {
44        self.properties.extend(props);
45        self
46    }
47
48    /// Check if this property range contains a byte position
49    pub fn contains(&self, pos: usize) -> bool {
50        pos >= self.start && pos < self.end
51    }
52
53    /// Check if this property range overlaps with another range
54    pub fn overlaps(&self, range: &Range<usize>) -> bool {
55        self.start < range.end && self.end > range.start
56    }
57
58    /// Get a property value by key
59    pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
60        self.properties.get(key)
61    }
62
63    /// Get a property as a specific type
64    pub fn get_as<T: for<'de> Deserialize<'de>>(&self, key: &str) -> Option<T> {
65        self.properties
66            .get(key)
67            .and_then(|v| serde_json::from_value(v.clone()).ok())
68    }
69}
70
71/// Unit for `InlineOverlay` `start` / `end` offsets.
72///
73/// Plugins emitting overlays for text whose byte/codepoint counts
74/// match (pure ASCII) can stay on the `Byte` default and avoid
75/// per-overlay UTF-8 arithmetic. Plugins working with text that
76/// may contain multi-byte characters can emit offsets in `Char`
77/// units and let the host convert them to byte offsets at
78/// consumption time — which is free in Rust against the entry's
79/// final text.
80#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, ts_rs::TS)]
81#[serde(rename_all = "camelCase")]
82#[ts(export, rename_all = "camelCase")]
83pub enum OffsetUnit {
84    /// UTF-8 byte offsets within the entry's text. Default.
85    #[default]
86    Byte,
87    /// Unicode codepoint (scalar value) offsets within the entry's
88    /// text. Converted to byte offsets at consumption time.
89    Char,
90}
91
92fn is_byte_unit(u: &OffsetUnit) -> bool {
93    matches!(u, OffsetUnit::Byte)
94}
95
96/// An inline overlay specifying styling for a sub-range within a text entry
97#[derive(Debug, Clone, Serialize, Deserialize, ts_rs::TS)]
98#[serde(rename_all = "camelCase")]
99#[ts(export, rename_all = "camelCase")]
100pub struct InlineOverlay {
101    /// Start offset within the entry's text. See `unit`.
102    pub start: usize,
103    /// End offset within the entry's text (exclusive). See `unit`.
104    pub end: usize,
105    /// Styling options for this range
106    #[ts(type = "Partial<OverlayOptions>")]
107    pub style: OverlayOptions,
108    /// Optional properties for this sub-range (e.g., click target metadata)
109    #[ts(type = "Record<string, any>")]
110    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
111    pub properties: HashMap<String, serde_json::Value>,
112    /// Unit for `start` / `end`. Defaults to `byte`.
113    #[serde(default, skip_serializing_if = "is_byte_unit")]
114    pub unit: OffsetUnit,
115}
116
117/// One styled segment of a `TextPropertyEntry` built via the
118/// `segments` field. Plugins use segments to describe row content
119/// structurally — a sequence of (text, optional style, optional
120/// nested overlays) — instead of pre-rendering the text and
121/// computing byte/char offsets for overlays themselves. The host
122/// concatenates segment text and emits the corresponding overlays
123/// during `normalize_widths`.
124#[derive(Debug, Clone, Serialize, Deserialize, ts_rs::TS)]
125#[serde(rename_all = "camelCase")]
126#[ts(export, rename_all = "camelCase")]
127pub struct StyledSegment {
128    /// Verbatim text for this segment.
129    pub text: String,
130    /// When set, the host emits an `InlineOverlay` covering this
131    /// segment's text in the final entry.
132    #[ts(type = "Partial<OverlayOptions>")]
133    #[serde(default, skip_serializing_if = "Option::is_none")]
134    pub style: Option<OverlayOptions>,
135    /// Additional overlays inside this segment. Offsets are in
136    /// the overlay's own `unit`, relative to the segment's start
137    /// (NOT the final entry text); the host shifts them by the
138    /// segment's position during concatenation.
139    #[serde(default, skip_serializing_if = "Vec::is_empty")]
140    pub overlays: Vec<InlineOverlay>,
141}
142
143/// An entry with text and its properties
144#[derive(Debug, Clone, Serialize, Deserialize, ts_rs::TS)]
145#[serde(rename_all = "camelCase")]
146#[ts(export, rename_all = "camelCase")]
147pub struct TextPropertyEntry {
148    /// The text content. When `segments` is non-empty `text` is
149    /// rebuilt from concatenating segment text during
150    /// `normalize_widths` and any value supplied here is replaced.
151    pub text: String,
152    /// Properties for this text
153    #[ts(type = "Record<string, any>")]
154    #[serde(default)]
155    pub properties: HashMap<String, serde_json::Value>,
156    /// Optional whole-entry styling
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub style: Option<OverlayOptions>,
159    /// Optional sub-range styling within this entry
160    #[serde(default, skip_serializing_if = "Vec::is_empty")]
161    pub inline_overlays: Vec<InlineOverlay>,
162    /// Optional segment list. When non-empty the host concatenates
163    /// segment text into `text` and pushes one `InlineOverlay`
164    /// (in `Char` units) per styled segment plus the segment's
165    /// nested `overlays` shifted by its position. Resolved before
166    /// truncate/pad/char-byte conversion in `normalize_widths`.
167    #[serde(default, skip_serializing_if = "Vec::is_empty")]
168    pub segments: Vec<StyledSegment>,
169    /// Pad `text` with spaces to this many display columns
170    /// (Unicode codepoints). No-op when `text` already has at
171    /// least this many codepoints. Applied before overlays are
172    /// resolved.
173    #[serde(default, skip_serializing_if = "Option::is_none")]
174    pub pad_to_chars: Option<u32>,
175    /// Truncate `text` to at most this many display columns
176    /// (Unicode codepoints). When the budget is greater than
177    /// 3 the truncated tail is replaced with `...`; when it is
178    /// 3 or less the text is cut at exactly the budget. Applied
179    /// before overlays are resolved.
180    #[serde(default, skip_serializing_if = "Option::is_none")]
181    pub truncate_to_chars: Option<u32>,
182}
183
184impl TextPropertyEntry {
185    /// Create a new entry with just text
186    pub fn text(text: impl Into<String>) -> Self {
187        Self {
188            text: text.into(),
189            properties: HashMap::new(),
190            style: None,
191            inline_overlays: Vec::new(),
192            segments: Vec::new(),
193            pad_to_chars: None,
194            truncate_to_chars: None,
195        }
196    }
197
198    /// Resolve `segments` (if any) into `text` plus inline overlays,
199    /// then apply `truncate_to_chars`, then `pad_to_chars`, then
200    /// convert any `unit: Char` overlays to byte offsets against the
201    /// resulting `text`. Idempotent: an entry with no segments,
202    /// pad/truncate hints, or char-unit overlays is left untouched.
203    ///
204    /// Truncation rounds the byte cut to a UTF-8 codepoint boundary.
205    /// Char-offset overlays beyond the resulting codepoint count are
206    /// clamped to that count.
207    pub fn normalize_widths(&mut self) {
208        if !self.segments.is_empty() {
209            // Segments are authoritative: replace any pre-existing
210            // `text`. Per-segment style becomes a Char-unit overlay
211            // covering the segment; nested overlays shift by the
212            // segment's start in their declared unit.
213            let segments = std::mem::take(&mut self.segments);
214            self.text.clear();
215            let mut char_cursor: usize = 0;
216            let mut byte_cursor: usize = 0;
217            for seg in segments {
218                let seg_chars = seg.text.chars().count();
219                let seg_bytes = seg.text.len();
220                if let Some(style) = seg.style {
221                    self.inline_overlays.push(InlineOverlay {
222                        start: char_cursor,
223                        end: char_cursor + seg_chars,
224                        style,
225                        properties: HashMap::new(),
226                        unit: OffsetUnit::Char,
227                    });
228                }
229                for mut o in seg.overlays {
230                    match o.unit {
231                        OffsetUnit::Char => {
232                            o.start += char_cursor;
233                            o.end += char_cursor;
234                        }
235                        OffsetUnit::Byte => {
236                            o.start += byte_cursor;
237                            o.end += byte_cursor;
238                        }
239                    }
240                    self.inline_overlays.push(o);
241                }
242                self.text.push_str(&seg.text);
243                char_cursor += seg_chars;
244                byte_cursor += seg_bytes;
245            }
246        }
247
248        if let Some(max_chars) = self.truncate_to_chars {
249            let max = max_chars as usize;
250            let cur = self.text.chars().count();
251            if cur > max {
252                if max <= 3 {
253                    let cut_byte = self
254                        .text
255                        .char_indices()
256                        .nth(max)
257                        .map(|(b, _)| b)
258                        .unwrap_or(self.text.len());
259                    self.text.truncate(cut_byte);
260                } else {
261                    let keep = max - 3;
262                    let cut_byte = self
263                        .text
264                        .char_indices()
265                        .nth(keep)
266                        .map(|(b, _)| b)
267                        .unwrap_or(self.text.len());
268                    self.text.truncate(cut_byte);
269                    self.text.push_str("...");
270                }
271            }
272        }
273
274        if let Some(min_chars) = self.pad_to_chars {
275            let cur = self.text.chars().count();
276            let target = min_chars as usize;
277            if target > cur {
278                let pad = target - cur;
279                self.text.reserve(pad);
280                for _ in 0..pad {
281                    self.text.push(' ');
282                }
283            }
284        }
285
286        let needs_conversion = self
287            .inline_overlays
288            .iter()
289            .any(|o| matches!(o.unit, OffsetUnit::Char));
290        if needs_conversion {
291            // Build a codepoint-index → byte-index lookup over the
292            // final text. One pass; subsequent overlay lookups are
293            // O(1) into the table.
294            let mut char_to_byte: Vec<usize> = self.text.char_indices().map(|(b, _)| b).collect();
295            char_to_byte.push(self.text.len());
296            for o in &mut self.inline_overlays {
297                if matches!(o.unit, OffsetUnit::Char) {
298                    let s = o.start.min(char_to_byte.len() - 1);
299                    let e = o.end.min(char_to_byte.len() - 1);
300                    o.start = char_to_byte[s];
301                    o.end = char_to_byte[e];
302                    o.unit = OffsetUnit::Byte;
303                }
304            }
305        }
306    }
307
308    /// Add a property
309    pub fn with_property(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
310        self.properties.insert(key.into(), value);
311        self
312    }
313
314    /// Set multiple properties
315    pub fn with_properties(mut self, props: HashMap<String, serde_json::Value>) -> Self {
316        self.properties = props;
317        self
318    }
319
320    /// Set whole-entry styling
321    pub fn with_style(mut self, style: OverlayOptions) -> Self {
322        self.style = Some(style);
323        self
324    }
325
326    /// Add a sub-range inline overlay
327    pub fn with_inline_overlay(mut self, start: usize, end: usize, style: OverlayOptions) -> Self {
328        self.inline_overlays.push(InlineOverlay {
329            start,
330            end,
331            style,
332            properties: HashMap::new(),
333            unit: OffsetUnit::Byte,
334        });
335        self
336    }
337
338    /// Push a styled segment. After `normalize_widths` runs, the
339    /// segment becomes part of `text` plus a Char-unit
340    /// `InlineOverlay` covering it (when `style` is set).
341    pub fn with_segment(mut self, text: impl Into<String>, style: Option<OverlayOptions>) -> Self {
342        self.segments.push(StyledSegment {
343            text: text.into(),
344            style,
345            overlays: Vec::new(),
346        });
347        self
348    }
349}
350
351#[cfg(test)]
352mod normalize_tests {
353    use super::*;
354
355    fn entry(text: &str) -> TextPropertyEntry {
356        TextPropertyEntry::text(text)
357    }
358
359    #[test]
360    fn pad_to_chars_pads_short_ascii_text() {
361        let mut e = entry("hi");
362        e.pad_to_chars = Some(5);
363        e.normalize_widths();
364        assert_eq!(e.text, "hi   ");
365    }
366
367    #[test]
368    fn pad_to_chars_is_noop_when_text_already_wider() {
369        let mut e = entry("longer than five");
370        e.pad_to_chars = Some(5);
371        e.normalize_widths();
372        assert_eq!(e.text, "longer than five");
373    }
374
375    #[test]
376    fn pad_to_chars_counts_codepoints_not_bytes() {
377        // 'é' is two UTF-8 bytes but one codepoint.
378        let mut e = entry("éé");
379        e.pad_to_chars = Some(4);
380        e.normalize_widths();
381        assert_eq!(e.text, "éé  ");
382    }
383
384    #[test]
385    fn truncate_to_chars_appends_ellipsis_when_budget_over_three() {
386        let mut e = entry("abcdefghij");
387        e.truncate_to_chars = Some(6);
388        e.normalize_widths();
389        assert_eq!(e.text, "abc...");
390    }
391
392    #[test]
393    fn truncate_to_chars_cuts_without_ellipsis_when_budget_three_or_less() {
394        let mut e = entry("abcdef");
395        e.truncate_to_chars = Some(3);
396        e.normalize_widths();
397        assert_eq!(e.text, "abc");
398    }
399
400    #[test]
401    fn truncate_to_chars_respects_codepoint_boundary() {
402        // 'é' is two UTF-8 bytes; cutting at byte 1 would split it.
403        let mut e = entry("éééé");
404        e.truncate_to_chars = Some(2);
405        e.normalize_widths();
406        assert_eq!(e.text, "éé");
407    }
408
409    #[test]
410    fn truncate_then_pad_combines_correctly() {
411        let mut e = entry("abcdefghij");
412        e.truncate_to_chars = Some(6);
413        e.pad_to_chars = Some(8);
414        e.normalize_widths();
415        assert_eq!(e.text, "abc...  ");
416    }
417
418    #[test]
419    fn char_unit_overlay_converted_to_byte_offsets_against_ascii() {
420        let mut e = entry("hello world");
421        e.inline_overlays.push(InlineOverlay {
422            start: 6,
423            end: 11,
424            style: OverlayOptions::default(),
425            properties: HashMap::new(),
426            unit: OffsetUnit::Char,
427        });
428        e.normalize_widths();
429        let o = &e.inline_overlays[0];
430        assert_eq!(o.start, 6);
431        assert_eq!(o.end, 11);
432        assert_eq!(o.unit, OffsetUnit::Byte);
433    }
434
435    #[test]
436    fn char_unit_overlay_converted_to_byte_offsets_with_multibyte_chars() {
437        // "éxé" = é(2) x(1) é(2) = 5 bytes, 3 codepoints
438        let mut e = entry("éxé");
439        e.inline_overlays.push(InlineOverlay {
440            start: 1,
441            end: 2,
442            style: OverlayOptions::default(),
443            properties: HashMap::new(),
444            unit: OffsetUnit::Char,
445        });
446        e.normalize_widths();
447        let o = &e.inline_overlays[0];
448        assert_eq!(o.start, 2);
449        assert_eq!(o.end, 3);
450        assert_eq!(o.unit, OffsetUnit::Byte);
451        assert_eq!(&e.text[o.start..o.end], "x");
452    }
453
454    #[test]
455    fn char_unit_overlay_after_pad_indexes_into_padded_text() {
456        let mut e = entry("hi");
457        e.pad_to_chars = Some(6);
458        e.inline_overlays.push(InlineOverlay {
459            start: 0,
460            end: 6,
461            style: OverlayOptions::default(),
462            properties: HashMap::new(),
463            unit: OffsetUnit::Char,
464        });
465        e.normalize_widths();
466        let o = &e.inline_overlays[0];
467        assert_eq!(o.start, 0);
468        assert_eq!(o.end, 6);
469    }
470
471    #[test]
472    fn char_unit_overlay_after_truncate_clamps_to_remaining_text() {
473        let mut e = entry("abcdefghij");
474        e.truncate_to_chars = Some(6); // becomes "abc..."
475        e.inline_overlays.push(InlineOverlay {
476            start: 0,
477            end: 100, // overshoots — clamp to text length in codepoints
478            style: OverlayOptions::default(),
479            properties: HashMap::new(),
480            unit: OffsetUnit::Char,
481        });
482        e.normalize_widths();
483        let o = &e.inline_overlays[0];
484        assert_eq!(o.start, 0);
485        assert_eq!(o.end, e.text.len());
486    }
487
488    #[test]
489    fn byte_unit_overlay_unchanged_by_normalize() {
490        let mut e = entry("hello");
491        e.inline_overlays.push(InlineOverlay {
492            start: 1,
493            end: 4,
494            style: OverlayOptions::default(),
495            properties: HashMap::new(),
496            unit: OffsetUnit::Byte,
497        });
498        e.normalize_widths();
499        let o = &e.inline_overlays[0];
500        assert_eq!(o.start, 1);
501        assert_eq!(o.end, 4);
502        assert_eq!(o.unit, OffsetUnit::Byte);
503    }
504
505    fn styled(text: &str, fg_marker_bold: bool) -> StyledSegment {
506        StyledSegment {
507            text: text.to_string(),
508            style: if fg_marker_bold {
509                Some(OverlayOptions {
510                    bold: true,
511                    ..Default::default()
512                })
513            } else {
514                None
515            },
516            overlays: Vec::new(),
517        }
518    }
519
520    #[test]
521    fn segments_concatenate_into_text() {
522        let mut e = entry("ignored");
523        e.segments = vec![
524            styled("hello", false),
525            styled(" ", false),
526            styled("world", false),
527        ];
528        e.normalize_widths();
529        assert_eq!(e.text, "hello world");
530        assert!(e.segments.is_empty(), "segments consumed");
531    }
532
533    #[test]
534    fn styled_segments_emit_char_unit_overlays_for_styled_segments_only() {
535        let mut e = entry("");
536        e.segments = vec![
537            styled("AB", false),
538            styled("CD", true), // bold
539            styled("EF", false),
540            styled("GH", true), // bold
541        ];
542        e.normalize_widths();
543        // After char→byte conversion (all ASCII so identity).
544        assert_eq!(e.text, "ABCDEFGH");
545        let bold: Vec<_> = e.inline_overlays.iter().filter(|o| o.style.bold).collect();
546        assert_eq!(bold.len(), 2);
547        assert_eq!((bold[0].start, bold[0].end), (2, 4));
548        assert_eq!((bold[1].start, bold[1].end), (6, 8));
549    }
550
551    #[test]
552    fn styled_segments_with_multibyte_text_emit_correct_byte_overlays() {
553        // "éé" + "x" + "éé" = chars [0..2, 2..3, 3..5], bytes [0..4, 4..5, 5..9].
554        let mut e = entry("");
555        e.segments = vec![styled("éé", false), styled("x", true), styled("éé", false)];
556        e.normalize_widths();
557        assert_eq!(e.text, "ééxéé");
558        let bold = e
559            .inline_overlays
560            .iter()
561            .find(|o| o.style.bold)
562            .expect("styled middle segment");
563        assert_eq!((bold.start, bold.end), (4, 5));
564        assert_eq!(&e.text[bold.start..bold.end], "x");
565    }
566
567    #[test]
568    fn segment_nested_overlays_shift_by_segment_position_in_their_unit() {
569        let mut e = entry("");
570        e.segments = vec![
571            StyledSegment {
572                text: "abc".to_string(),
573                style: None,
574                overlays: vec![],
575            },
576            StyledSegment {
577                text: "éé".to_string(),
578                style: None,
579                overlays: vec![InlineOverlay {
580                    start: 1,
581                    end: 2,
582                    style: OverlayOptions {
583                        bold: true,
584                        ..Default::default()
585                    },
586                    properties: HashMap::new(),
587                    unit: OffsetUnit::Char,
588                }],
589            },
590        ];
591        e.normalize_widths();
592        // "abcéé" — segment2 starts at char 3, byte 3.
593        // Nested overlay [1..2] in segment2 → entry chars [4..5].
594        // Char→byte conversion: char 4 = byte 5, char 5 = byte 7.
595        let bold = e
596            .inline_overlays
597            .iter()
598            .find(|o| o.style.bold)
599            .expect("nested overlay");
600        assert_eq!(&e.text[bold.start..bold.end], "é");
601    }
602
603    #[test]
604    fn segments_then_pad_works() {
605        let mut e = entry("");
606        e.segments = vec![styled("ab", true)];
607        e.pad_to_chars = Some(5);
608        e.normalize_widths();
609        assert_eq!(e.text, "ab   ");
610        let bold = e
611            .inline_overlays
612            .iter()
613            .find(|o| o.style.bold)
614            .expect("segment overlay");
615        assert_eq!((bold.start, bold.end), (0, 2));
616    }
617
618    #[test]
619    fn segments_then_truncate_clamps_overlapping_overlay() {
620        let mut e = entry("");
621        e.segments = vec![styled("abcdefghij", true)];
622        e.truncate_to_chars = Some(5);
623        e.normalize_widths();
624        // Truncated to "ab..." (budget>3).
625        assert_eq!(e.text, "ab...");
626        let bold = e
627            .inline_overlays
628            .iter()
629            .find(|o| o.style.bold)
630            .expect("segment overlay");
631        // Bold overlay covered chars [0..10] originally; clamped to
632        // the new text length (5 codepoints / 5 bytes ASCII).
633        assert_eq!(bold.end, e.text.len());
634    }
635
636    #[test]
637    fn segments_replace_pre_existing_text() {
638        let mut e = entry("should be discarded");
639        e.segments = vec![styled("only this", false)];
640        e.normalize_widths();
641        assert_eq!(e.text, "only this");
642    }
643}