kimun_notes/components/text_editor/
parse_incremental.rs

1#![allow(dead_code)]
2//! Incremental-parse machinery: line-construct classification cache,
3//! damage-diff against the previous buffer snapshot, safe-boundary
4//! widening, and fence-range derivation. Pure functions only — no
5//! `pulldown_cmark` calls (those live in `markdown.rs`).
6
7use std::ops::Range;
8
9/// Coarse classification of a buffer line for safe-boundary widening.
10///
11/// A line is a *safe boundary* when re-parsing a slice ending on that
12/// line is equivalent to the corresponding slice of a full-buffer parse.
13/// `Blank` and `Plain` are unconditional boundaries when their neighbour
14/// is also `Blank`/`Plain` or end-of-buffer. Structural markers
15/// (`FenceMarker`, `ListMarker`, etc.) are NEVER boundaries — widening
16/// must reach the outer terminator of whatever construct they belong to.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum LineConstructKind {
19    Blank,
20    Plain,
21    FenceMarker,
22    FenceContent,
23    IndentedCode,
24    ListMarker,
25    ListContinuation,
26    Blockquote(u8),
27    SetextUnderline,
28    HtmlBlock,
29    Heading,
30}
31
32/// Result of widening a damaged range to safe construct boundaries.
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub enum WidenResult {
35    /// Widened range; caller passes this to `ParsedBuffer::parse_range`.
36    Widened(Range<usize>),
37    /// Range cannot be cheaply widened (cap trip, unbounded construct).
38    /// Caller falls back to `ParsedBuffer::parse(lines)`.
39    FullRebuild,
40}
41
42/// Maximum fraction of buffer the widened range may cover before we
43/// abandon incremental and fall back to a full parse. Half the buffer
44/// is the empirical cross-over where parse+splice overhead exceeds a
45/// fresh full parse on the same input.
46pub(super) const MAX_INCREMENTAL_FRACTION: f32 = 0.5;
47
48/// Absolute cap on the widened range. Independent of buffer size; keeps
49/// large-fence edits bounded even on small buffers.
50pub(super) const MAX_INCREMENTAL_LINES: usize = 256;
51
52/// Cursor-row hint scan window for `compute_damage_range`. Empirically
53/// covers single-character edits, IME composition of up to 3 graphemes,
54/// and one Enter at line end. Multi-line pastes intentionally fall
55/// through to the LCP/LCS slow path.
56pub(super) const CURSOR_HINT_WINDOW: usize = 4;
57
58/// Compute the row range that differs between `old` and `new`, with a
59/// cursor-row hint to accelerate the common single-character-edit case.
60///
61/// **Contract:** `cursor_row` must be the row that was actually edited
62/// (the editor's cursor position after the keystroke). The fast path
63/// trusts this — if `cursor_row` does not identify the real edit point,
64/// the function may under-report the damaged range for an edit shape
65/// that single-keystroke editing cannot produce. Distant simultaneous
66/// edits are out of scope; they can only happen via programmatic
67/// buffer replacement, which goes through `set_text` and bumps
68/// `text_revision` such that the LCP/LCS slow path is taken naturally
69/// (the cursor row's content will match between old and new, so the
70/// fast path declines and the slow path runs).
71///
72/// Returns `None` when the buffers are byte-identical (defensive
73/// guard — callers should already have gated on `text_revision`).
74///
75/// Fast path: same line count, the row at `cursor_row` differs, and
76/// no other line in `±CURSOR_HINT_WINDOW` differs. Returns
77/// `Some(cursor_row..cursor_row + 1)`. O(`CURSOR_HINT_WINDOW`).
78///
79/// Slow path: longest common prefix (LCP) and longest common suffix
80/// (LCS); damaged range is the middle slice. O(min(buffer_size,
81/// damage_size)).
82pub fn compute_damage_range(
83    old: &[String],
84    new: &[String],
85    cursor_row: usize,
86) -> Option<Range<usize>> {
87    if old == new {
88        return None;
89    }
90
91    // Fast path: same line count, cursor row differs, no other diff in window.
92    if old.len() == new.len() && cursor_row < old.len() && old[cursor_row] != new[cursor_row] {
93        let lo = cursor_row.saturating_sub(CURSOR_HINT_WINDOW);
94        let hi = (cursor_row + CURSOR_HINT_WINDOW + 1).min(old.len());
95        let other_diff_in_window = (lo..hi).any(|i| i != cursor_row && old[i] != new[i]);
96        if !other_diff_in_window {
97            return Some(cursor_row..cursor_row + 1);
98        }
99    }
100
101    // Slow path: longest common prefix + suffix. O(buffer_len)
102    // String equalities; each compare is a length check + at most one
103    // SIMD memcmp on the first-differing byte. ~14µs on a 5000-line
104    // buffer for a single-row backspace.
105    //
106    // A cursor-anchored bound was explored as perf #12 and rejected:
107    //  - Capping the scan at `cursor_row + slack` saves nothing,
108    //    because the scan naturally stops at the first-differing
109    //    row, which IS `cursor_row` for keystroke-driven edits.
110    //  - Starting the LCP scan at `cursor_row - slack` (trusting
111    //    rows above to be unchanged) would skip the prefix scan but
112    //    introduces silent miscompilation risk on edits whose actual
113    //    diff is far from the cursor (paste, undo, programmatic
114    //    edit) — the post-slice verify only checks rows WITHIN the
115    //    widened range, so a misidentified damage range outside
116    //    that range is not caught.
117    //  - Maintaining per-row hashes alongside `lines_snapshot` would
118    //    let us replace string compares with u64 compares, but
119    //    requires plumbing damage hints from the editor's edit
120    //    surface to view.update for incremental hash maintenance —
121    //    bigger change than the 10µs win justifies.
122    //
123    // Until per-row hashes ship as part of a broader edit-surface
124    // refactor, the full O(buffer) scan stays.
125    let lcp = old
126        .iter()
127        .zip(new.iter())
128        .take_while(|(a, b)| a == b)
129        .count();
130    let lcs = old
131        .iter()
132        .rev()
133        .zip(new.iter().rev())
134        .take_while(|(a, b)| a == b)
135        .count();
136    // Guard against overlap when both buffers share a long common stretch.
137    // Clamp lcs so the resulting range is non-empty and start <= end.
138    let new_end = new.len().saturating_sub(lcs);
139    let old_end = old.len().saturating_sub(lcs);
140    let start = lcp.min(new_end).min(old_end);
141    let end = new_end.max(start);
142    Some(start..end)
143}
144
145/// Return true when `kind` is a self-contained, safe boundary line.
146/// Blank lines and ordinary paragraph lines are safe; everything else
147/// belongs to a multi-line construct that widening must include in
148/// full.
149fn is_safe_boundary(kind: LineConstructKind) -> bool {
150    matches!(kind, LineConstructKind::Blank | LineConstructKind::Plain)
151}
152
153/// Walk upward from `damaged_start` (the first damaged row) until the
154/// row just above is a safe boundary. Returns the new start row
155/// (inclusive).
156///
157/// `ListMarker` and `ListContinuation` are non-safe, so the walk
158/// passes through them automatically — landing on the safe row above
159/// the outermost list (Blank, or Plain that is not a continuation),
160/// which is the G1-required outermost-list-ancestor stopping point.
161fn widen_up(kinds: &[LineConstructKind], damaged_start: usize) -> usize {
162    let mut row = damaged_start;
163    while row > 0 {
164        let candidate = row - 1;
165        if is_safe_boundary(kinds[candidate]) {
166            return candidate;
167        }
168        row = candidate;
169    }
170    0
171}
172
173/// Walk downward from `damaged.end` (the first row past the damage)
174/// until we land on a safe boundary or end of buffer. Returns the
175/// exclusive end index.
176fn widen_down(kinds: &[LineConstructKind], damaged_end: usize) -> usize {
177    let mut row = damaged_end;
178    while row < kinds.len() {
179        if is_safe_boundary(kinds[row]) {
180            return row + 1;
181        }
182        row += 1;
183    }
184    kinds.len()
185}
186
187/// Expand `damaged` to the nearest reset boundaries on each side.
188/// A reset boundary is a row where pulldown-cmark's parser state is
189/// provably reset (see `ParsedBuffer::reset_boundaries`), so the
190/// returned range is provably equivalent to a fresh parse over the
191/// same slice — no post-slice verification needed in release.
192///
193/// `boundaries` must be sorted and contain `0` and `lines_len` as
194/// sentinels (every `ParsedBuffer::parse` ensures this). Returns
195/// `FullRebuild` if the expanded range trips either cap (same
196/// semantics as `widen_to_safe`).
197///
198/// This replaces the heuristic `widen_to_safe`-plus-structural-marker
199/// guard tower. The latter is kept available as a behavioural
200/// comparison source for one release cycle (per the openspec
201/// migration plan) before being deleted.
202pub fn expand_to_reset_boundary(
203    boundaries: &[usize],
204    lines_len: usize,
205    damaged: Range<usize>,
206) -> WidenResult {
207    if lines_len == 0 {
208        return WidenResult::FullRebuild;
209    }
210    debug_assert!(
211        damaged.start <= lines_len && damaged.end <= lines_len,
212        "expand_to_reset_boundary: damaged range {:?} out of bounds for lines_len = {}",
213        damaged,
214        lines_len,
215    );
216
217    // Greatest boundary <= damaged.start.
218    let start = boundaries
219        .iter()
220        .rev()
221        .find(|&&b| b <= damaged.start)
222        .copied()
223        .unwrap_or(0);
224    // Least boundary >= damaged.end. Sentinel `lines_len` is always
225    // present in a well-formed boundary set so the `unwrap_or` is
226    // unreachable; kept as a defensive fallback to avoid an inverted
227    // range if the invariant is ever violated.
228    let end = boundaries
229        .iter()
230        .find(|&&b| b >= damaged.end)
231        .copied()
232        .unwrap_or(lines_len);
233
234    let widened_len = end - start;
235    let cap_abs = MAX_INCREMENTAL_LINES;
236    // Same cap policy as widen_to_safe; see its docstring for the
237    // rationale on flooring `cap_frac` at `cap_abs`.
238    let cap_frac = (((lines_len as f32) * MAX_INCREMENTAL_FRACTION) as usize).max(cap_abs);
239    if widened_len > cap_abs || widened_len > cap_frac {
240        return WidenResult::FullRebuild;
241    }
242    WidenResult::Widened(start..end)
243}
244
245/// Widen `damaged` outward to safe construct boundaries, applying
246/// D5's +1 extra row and the D4 cap.
247///
248/// Returns `Widened(range)` when the widened range fits under the cap,
249/// or `FullRebuild` when the cap is exceeded or the buffer is empty.
250///
251/// Kept available for one release cycle as a behavioural comparison
252/// source against `expand_to_reset_boundary` (see openspec change
253/// `parse-reset-boundaries`). New call sites should use
254/// `expand_to_reset_boundary` instead.
255pub fn widen_to_safe(kinds: &[LineConstructKind], damaged: Range<usize>) -> WidenResult {
256    if kinds.is_empty() {
257        return WidenResult::FullRebuild;
258    }
259    debug_assert!(
260        damaged.start <= kinds.len() && damaged.end <= kinds.len(),
261        "widen_to_safe: damaged range {:?} out of bounds for kinds.len() = {}",
262        damaged,
263        kinds.len(),
264    );
265
266    let mut start = widen_up(kinds, damaged.start);
267    let mut end = widen_down(kinds, damaged.end);
268
269    // D5: widen one extra row on each side.
270    start = start.saturating_sub(1);
271    end = (end + 1).min(kinds.len());
272
273    let widened_len = end - start;
274    let cap_abs = MAX_INCREMENTAL_LINES;
275    // Fractional cap encodes the empirical "fresh full parse beats
276    // parse+splice" cross-over. It is only meaningful once full-parse
277    // cost is non-trivial; floor it at `cap_abs` so a 50%-widening on
278    // a tiny buffer (where both options are sub-millisecond) stays on
279    // the incremental path. Above `2 * cap_abs` lines the fractional
280    // cap dominates and catches large widenings the absolute cap
281    // would otherwise miss — this is the regime the previous `&&`
282    // operator left unguarded.
283    let cap_frac = (((kinds.len() as f32) * MAX_INCREMENTAL_FRACTION) as usize).max(cap_abs);
284    if widened_len > cap_abs || widened_len > cap_frac {
285        return WidenResult::FullRebuild;
286    }
287
288    WidenResult::Widened(start..end)
289}
290
291/// Derive fence-range half-open intervals from the per-line construct
292/// kinds. The view layer uses these to decide which logical rows
293/// render `force_raw` (no markdown re-styling, code-block fg color).
294///
295/// Half-open: a fence spanning rows `start..=end_inclusive` (both markers
296/// included) is returned as `start..end_inclusive + 1`. An unclosed
297/// fence runs to the end of the buffer.
298pub fn fence_ranges_from_kinds(kinds: &[LineConstructKind]) -> Vec<Range<usize>> {
299    let mut ranges = Vec::new();
300    let mut i = 0;
301    while i < kinds.len() {
302        if kinds[i] == LineConstructKind::FenceMarker {
303            let start = i;
304            i += 1;
305            while i < kinds.len() && kinds[i] == LineConstructKind::FenceContent {
306                i += 1;
307            }
308            if i < kinds.len() && kinds[i] == LineConstructKind::FenceMarker {
309                ranges.push(start..i + 1);
310                i += 1;
311            } else {
312                // Unclosed fence — extends to end of buffer.
313                ranges.push(start..kinds.len());
314            }
315        } else {
316            i += 1;
317        }
318    }
319    ranges
320}
321
322/// Line ranges of every code block (fenced AND indented) in the buffer,
323/// in ascending order. Reuses [`fence_ranges_from_kinds`] for fenced blocks
324/// (incl. unclosed-fence handling) and adds maximal `IndentedCode` runs.
325/// Used by the view to paint the code-box background.
326pub fn code_block_ranges_from_kinds(kinds: &[LineConstructKind]) -> Vec<Range<usize>> {
327    let mut ranges = fence_ranges_from_kinds(kinds);
328    let mut i = 0;
329    while i < kinds.len() {
330        if kinds[i] == LineConstructKind::IndentedCode {
331            let start = i;
332            while i < kinds.len() && kinds[i] == LineConstructKind::IndentedCode {
333                i += 1;
334            }
335            ranges.push(start..i);
336        } else {
337            i += 1;
338        }
339    }
340    // Fenced ranges are collected first then indented ones appended; sort so the
341    // combined list is ascending. Fenced and indented spans never overlap.
342    ranges.sort_by_key(|r| r.start);
343    ranges
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349    use crate::components::text_editor::markdown::ParsedBuffer;
350
351    fn kinds_of(lines: &[&str]) -> Vec<LineConstructKind> {
352        let owned: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
353        ParsedBuffer::parse(&owned).kinds
354    }
355
356    #[test]
357    fn plain_paragraph() {
358        assert_eq!(kinds_of(&["hello world"]), vec![LineConstructKind::Plain]);
359    }
360
361    #[test]
362    fn blank_line() {
363        assert_eq!(kinds_of(&[""]), vec![LineConstructKind::Blank]);
364    }
365
366    #[test]
367    fn atx_heading() {
368        assert_eq!(kinds_of(&["# title"]), vec![LineConstructKind::Heading]);
369    }
370
371    #[test]
372    fn setext_underline_above_is_plain() {
373        let k = kinds_of(&["title", "====="]);
374        assert_eq!(
375            k,
376            vec![LineConstructKind::Plain, LineConstructKind::SetextUnderline]
377        );
378    }
379
380    #[test]
381    fn fence_pair() {
382        let k = kinds_of(&["```rust", "let x = 1;", "```"]);
383        assert_eq!(
384            k,
385            vec![
386                LineConstructKind::FenceMarker,
387                LineConstructKind::FenceContent,
388                LineConstructKind::FenceMarker,
389            ]
390        );
391    }
392
393    #[test]
394    fn list_marker_and_continuation() {
395        let k = kinds_of(&["- item", "  continuation"]);
396        assert_eq!(
397            k,
398            vec![
399                LineConstructKind::ListMarker,
400                LineConstructKind::ListContinuation
401            ]
402        );
403    }
404
405    #[test]
406    fn blockquote_levels() {
407        let k = kinds_of(&[">> two"]);
408        assert_eq!(k, vec![LineConstructKind::Blockquote(2)]);
409    }
410
411    #[test]
412    fn indented_code() {
413        let k = kinds_of(&["", "    let x = 1;"]);
414        assert_eq!(k[1], LineConstructKind::IndentedCode);
415    }
416
417    #[test]
418    fn html_block() {
419        let k = kinds_of(&["<div>", "body", "</div>"]);
420        assert!(matches!(k[0], LineConstructKind::HtmlBlock));
421    }
422
423    #[test]
424    fn inline_html_inside_paragraph_does_not_become_html_block() {
425        // Regression: `Event::InlineHtml` previously painted the
426        // paragraph row as HtmlBlock, defeating safe-boundary widening
427        // for any paragraph containing inline HTML like `<br>` or
428        // `<span>`.
429        let k = kinds_of(&["hello <br> world"]);
430        assert_eq!(
431            k[0],
432            LineConstructKind::Plain,
433            "paragraph with inline HTML must stay Plain"
434        );
435        let k = kinds_of(&["see <span>x</span> end"]);
436        assert_eq!(k[0], LineConstructKind::Plain);
437    }
438
439    fn lines(strs: &[&str]) -> Vec<String> {
440        strs.iter().map(|s| s.to_string()).collect()
441    }
442
443    #[test]
444    fn damage_single_char_insert_uses_cursor_hint() {
445        let old = lines(&["hello", "world"]);
446        let new = lines(&["hello", "worldx"]);
447        assert_eq!(compute_damage_range(&old, &new, 1), Some(1..2));
448    }
449
450    #[test]
451    fn damage_no_change_returns_none() {
452        let old = lines(&["a", "b"]);
453        assert_eq!(compute_damage_range(&old, &old, 0), None);
454    }
455
456    #[test]
457    fn damage_enter_at_line_end_uses_lcp_lcs() {
458        let old = lines(&["alpha", "beta"]);
459        let new = lines(&["alpha", "be", "ta"]);
460        let dmg = compute_damage_range(&old, &new, 1).unwrap();
461        assert_eq!(dmg.start, 1);
462        assert_eq!(dmg.end, new.len()); // damaged = [1..3)
463    }
464
465    #[test]
466    fn damage_backspace_merging_lines() {
467        let old = lines(&["alpha", "beta", "gamma"]);
468        let new = lines(&["alphabeta", "gamma"]);
469        let dmg = compute_damage_range(&old, &new, 0).unwrap();
470        assert_eq!(dmg.start, 0);
471    }
472
473    #[test]
474    fn damage_multi_diff_within_window_falls_through_to_slow_path() {
475        // Two rows differ, both within CURSOR_HINT_WINDOW of the cursor.
476        // Fast path's other-diff-in-window check trips → LCP/LCS slow path.
477        let old = lines(&["a", "b", "c", "d", "e"]);
478        let mut new = old.clone();
479        new[1] = "B".to_string();
480        new[2] = "C".to_string();
481        // Cursor at row 1; the window covers rows 0..=4 (full buffer here).
482        let dmg = compute_damage_range(&old, &new, 1).unwrap();
483        // Slow path: LCP=1, LCS=2 → 1..3
484        assert_eq!(dmg, 1..3);
485    }
486
487    fn kinds_str(s: &str) -> Vec<LineConstructKind> {
488        // Compact spec: one char per line.
489        // P=Plain, B=Blank, F=FenceMarker, C=FenceContent,
490        // L=ListMarker, l=ListContinuation, Q=Blockquote(1),
491        // S=SetextUnderline, H=Heading, I=IndentedCode, X=HtmlBlock.
492        s.chars()
493            .map(|c| match c {
494                'P' => LineConstructKind::Plain,
495                'B' => LineConstructKind::Blank,
496                'F' => LineConstructKind::FenceMarker,
497                'C' => LineConstructKind::FenceContent,
498                'L' => LineConstructKind::ListMarker,
499                'l' => LineConstructKind::ListContinuation,
500                'Q' => LineConstructKind::Blockquote(1),
501                'S' => LineConstructKind::SetextUnderline,
502                'H' => LineConstructKind::Heading,
503                'I' => LineConstructKind::IndentedCode,
504                'X' => LineConstructKind::HtmlBlock,
505                _ => panic!("bad kind char {c}"),
506            })
507            .collect()
508    }
509
510    #[test]
511    fn widen_plain_paragraph_to_blank_boundaries() {
512        // P B P P P B P — damage row 3 → widen to blank rows 1 and 5
513        // (plus the D5 +1 each side: 0 and 6 — but the buffer ends are
514        // also boundaries; clamp).
515        let k = kinds_str("PBPPPBP");
516        match widen_to_safe(&k, 3..4) {
517            WidenResult::Widened(r) => {
518                // Must include the blank rows at 1 and 5 (or wider).
519                assert!(r.start <= 1, "widen.start <= 1, got {}", r.start);
520                assert!(r.end >= 6, "widen.end >= 6, got {}", r.end);
521            }
522            x => panic!("expected Widened, got {x:?}"),
523        }
524    }
525
526    #[test]
527    fn widen_fence_interior_includes_both_markers() {
528        // P B F C C C F B P — damage row 4 (inside fence) → widen
529        // to include both fence markers + one extra line on each side.
530        let k = kinds_str("PBFCCCFBP");
531        match widen_to_safe(&k, 4..5) {
532            WidenResult::Widened(r) => {
533                assert!(
534                    r.start <= 2,
535                    "must include opening fence marker at row 2, got start {}",
536                    r.start
537                );
538                assert!(
539                    r.end >= 7,
540                    "must include closing fence marker at row 6 (end >= 7), got end {}",
541                    r.end
542                );
543            }
544            x => panic!("expected Widened, got {x:?}"),
545        }
546    }
547
548    #[test]
549    fn widen_list_continuation_reaches_outermost_marker() {
550        // L l L l l l B P — damage at row 4 (nested continuation) → widen
551        // up to outermost ListMarker at row 0.
552        let k = kinds_str("LlLlllBP");
553        match widen_to_safe(&k, 4..5) {
554            WidenResult::Widened(r) => assert_eq!(r.start, 0, "must reach col-0 list marker"),
555            x => panic!("expected Widened, got {x:?}"),
556        }
557    }
558
559    #[test]
560    fn widen_setext_underline_includes_text_line_above() {
561        // P S P — damage at row 1 (underline) → widen to include row 0
562        // (heading text line).
563        let k = kinds_str("PSP");
564        match widen_to_safe(&k, 1..2) {
565            WidenResult::Widened(r) => {
566                assert_eq!(r.start, 0, "must include row above setext underline")
567            }
568            x => panic!("expected Widened, got {x:?}"),
569        }
570    }
571
572    #[test]
573    fn widen_html_block_includes_whole_block() {
574        // P X X X B P — damage at row 2 (middle of HTML) → widen to
575        // include all HtmlBlock rows.
576        let k = kinds_str("PXXXBP");
577        match widen_to_safe(&k, 2..3) {
578            WidenResult::Widened(r) => {
579                assert!(
580                    r.start <= 1,
581                    "must include first HtmlBlock row, got start {}",
582                    r.start
583                );
584                assert!(
585                    r.end >= 4,
586                    "must include last HtmlBlock row, got end {}",
587                    r.end
588                );
589            }
590            x => panic!("expected Widened, got {x:?}"),
591        }
592    }
593
594    #[test]
595    fn widen_exceeds_cap_returns_full_rebuild() {
596        // 300-line all-FenceContent buffer; the damage is one line;
597        // widening tries to reach the fence ends but the buffer is
598        // uniformly fence content, so widening goes to 0..300, which
599        // exceeds MAX_INCREMENTAL_LINES (256).
600        let k = vec![LineConstructKind::FenceContent; 300];
601        assert_eq!(widen_to_safe(&k, 150..151), WidenResult::FullRebuild);
602    }
603
604    #[test]
605    fn widen_trips_when_fractional_cap_exceeds_absolute() {
606        // Regression: cap-trip used `&&` instead of `||`, so on a buffer
607        // big enough that `cap_frac > cap_abs` (kinds.len() > 512), a
608        // widened range between the two thresholds slipped through.
609        // 600-line buffer of FenceContent → cap_abs=256, cap_frac=300.
610        // Widening covers the whole buffer (no safe boundaries), so
611        // widened_len=600 must trip the fallback.
612        let k = vec![LineConstructKind::FenceContent; 600];
613        assert_eq!(widen_to_safe(&k, 300..301), WidenResult::FullRebuild);
614    }
615
616    #[test]
617    fn widen_at_buffer_start_clamps_to_zero() {
618        let k = kinds_str("PPPPP");
619        match widen_to_safe(&k, 0..1) {
620            WidenResult::Widened(r) => assert_eq!(r.start, 0),
621            x => panic!("expected Widened, got {x:?}"),
622        }
623    }
624
625    #[test]
626    fn widen_at_buffer_end_clamps_to_len() {
627        let k = kinds_str("PPPPP");
628        match widen_to_safe(&k, 4..5) {
629            WidenResult::Widened(r) => assert_eq!(r.end, 5),
630            x => panic!("expected Widened, got {x:?}"),
631        }
632    }
633
634    #[test]
635    fn parse_records_boundaries_for_blank_separated_paragraphs() {
636        // Realistic markdown layout: each paragraph followed by a
637        // blank line. Pulldown ends each Paragraph; depth drops to
638        // 0 at the following blank row. The boundary set should
639        // contain every blank row.
640        use super::super::markdown::ParsedBuffer;
641        let mut lines: Vec<String> = Vec::with_capacity(8);
642        for i in 0..4 {
643            lines.push(format!("paragraph {i}"));
644            lines.push(String::new());
645        }
646        let pb = ParsedBuffer::parse(&lines);
647        // Expected: 0, then every Blank row (1, 3, 5, 7), then lines.len() (8).
648        // The blank at row 7 == lines.len()-1 may or may not be
649        // present depending on whether depth==0 was reached at that
650        // row; check the interior at least.
651        assert!(pb.reset_boundaries.contains(&0), "sentinel 0 missing");
652        assert!(
653            pb.reset_boundaries.contains(&lines.len()),
654            "sentinel lines.len() missing"
655        );
656        assert!(
657            pb.reset_boundaries.contains(&1),
658            "blank after paragraph 0 should be a boundary, got {:?}",
659            pb.reset_boundaries
660        );
661        assert!(
662            pb.reset_boundaries.contains(&3),
663            "blank after paragraph 1 should be a boundary, got {:?}",
664            pb.reset_boundaries
665        );
666    }
667
668    #[test]
669    fn expand_to_reset_uses_nearest_sentinels() {
670        // Only sentinels [0, 5] in the boundary set — every edit
671        // expands to the full buffer.
672        let boundaries = vec![0, 5];
673        match expand_to_reset_boundary(&boundaries, 5, 2..3) {
674            WidenResult::Widened(r) => assert_eq!(r, 0..5),
675            x => panic!("expected Widened, got {x:?}"),
676        }
677    }
678
679    #[test]
680    fn expand_to_reset_snaps_to_interior_boundaries() {
681        // Boundaries at rows 0, 3, 6, 10 (e.g. blank-separated
682        // blocks). Damage at row 4 expands to 3..6.
683        let boundaries = vec![0, 3, 6, 10];
684        match expand_to_reset_boundary(&boundaries, 10, 4..5) {
685            WidenResult::Widened(r) => assert_eq!(r, 3..6),
686            x => panic!("expected Widened, got {x:?}"),
687        }
688    }
689
690    #[test]
691    fn expand_to_reset_damage_at_exact_boundary_is_zero_span() {
692        // Damage range coincides with a boundary point. The function
693        // returns the smallest enclosing boundary pair.
694        let boundaries = vec![0, 3, 6, 10];
695        // damaged.start == damaged.end == 6. Expands to 6..6 (empty).
696        match expand_to_reset_boundary(&boundaries, 10, 6..6) {
697            WidenResult::Widened(r) => assert_eq!(r, 6..6),
698            x => panic!("expected Widened, got {x:?}"),
699        }
700    }
701
702    #[test]
703    fn expand_to_reset_empty_buffer_falls_back() {
704        let boundaries = vec![0];
705        assert_eq!(
706            expand_to_reset_boundary(&boundaries, 0, 0..0),
707            WidenResult::FullRebuild
708        );
709    }
710
711    #[test]
712    fn expand_to_reset_caps_trip_fallback() {
713        // 600-row buffer, no interior boundaries. Damage at 300
714        // expands to 0..600 which exceeds cap_abs (256) and cap_frac
715        // (300, floored at cap_abs).
716        let boundaries = vec![0, 600];
717        assert_eq!(
718            expand_to_reset_boundary(&boundaries, 600, 300..301),
719            WidenResult::FullRebuild
720        );
721    }
722
723    #[test]
724    fn widen_blockquote_includes_whole_block() {
725        // P Q Q Q B P — damage in the middle of a blockquote → widen
726        // to include the whole blockquote.
727        let k = kinds_str("PQQQBP");
728        match widen_to_safe(&k, 2..3) {
729            WidenResult::Widened(r) => {
730                assert!(
731                    r.start <= 1,
732                    "must include first Blockquote row, got start {}",
733                    r.start
734                );
735                assert!(
736                    r.end >= 4,
737                    "must include last Blockquote row, got end {}",
738                    r.end
739                );
740            }
741            x => panic!("expected Widened, got {x:?}"),
742        }
743    }
744
745    #[test]
746    fn widen_multi_list_does_not_over_pull_across_blank() {
747        // Two independent lists separated by a blank line. Damage in
748        // the second list must not pull the first list into the slice.
749        let k = kinds_str("LlBLll");
750        match widen_to_safe(&k, 4..5) {
751            WidenResult::Widened(r) => {
752                // The blank at row 2 is the separator. Widening must
753                // stop there (or at the row above, after D5 +1).
754                assert!(
755                    r.start >= 1,
756                    "widen.start must be >= 1 (D5 may pull past Blank by one row), got {}",
757                    r.start
758                );
759                assert!(
760                    r.start <= 2,
761                    "widen.start must not pull in list A, got {}",
762                    r.start
763                );
764            }
765            x => panic!("expected Widened, got {x:?}"),
766        }
767    }
768
769    #[test]
770    fn fence_ranges_single_fence() {
771        // P F C C F P — fence covers rows 1..5 (half-open: both markers + content).
772        let k = kinds_str("PFCCFP");
773        let r = fence_ranges_from_kinds(&k);
774        assert_eq!(r, vec![1..5]);
775    }
776
777    #[test]
778    fn fence_ranges_two_fences() {
779        // F C F P F C F — two fences at 0..3 and 4..7.
780        let k = kinds_str("FCFPFCF");
781        let r = fence_ranges_from_kinds(&k);
782        assert_eq!(r, vec![0..3, 4..7]);
783    }
784
785    #[test]
786    fn fence_ranges_unclosed_extends_to_end() {
787        // P F C C C — unclosed fence runs to end of buffer.
788        let k = kinds_str("PFCCC");
789        let r = fence_ranges_from_kinds(&k);
790        assert_eq!(r, vec![1..5]);
791    }
792
793    #[test]
794    fn fence_ranges_empty() {
795        assert!(fence_ranges_from_kinds(&[]).is_empty());
796    }
797
798    #[test]
799    fn code_block_ranges_covers_fenced_and_indented() {
800        // Fenced block then a blank then an indented code block.
801        let k = kinds_of(&[
802            "```",          // FenceMarker
803            "let x = 1;",   // FenceContent
804            "```",          // FenceMarker
805            "",             // Blank
806            "    indented", // IndentedCode
807            "    code",     // IndentedCode
808        ]);
809        let r = code_block_ranges_from_kinds(&k);
810        assert_eq!(r, vec![0..3, 4..6]);
811    }
812
813    #[test]
814    fn investigate_list_fence_indented_code_interaction() {
815        // Initial: row 7 "    a" is after "- a" (row 1) with 5 blank lines in between.
816        // After editing row 9 (blank → space inside fence), fresh parse changes row 7.
817        let initial: Vec<String> = vec![
818            "".to_string(),      // 0: Blank
819            "- a".to_string(),   // 1: ListMarker
820            "".to_string(),      // 2: Blank
821            "".to_string(),      // 3: Blank
822            "".to_string(),      // 4: Blank
823            "".to_string(),      // 5: Blank
824            "".to_string(),      // 6: Blank
825            "    a".to_string(), // 7: ? - before fence
826            "```".to_string(),   // 8: FenceMarker
827            "".to_string(),      // 9: FenceContent -> edit to " "
828            "".to_string(),      // 10: FenceContent
829            "".to_string(),      // 11: FenceContent
830            "".to_string(),      // 12: FenceContent
831            "".to_string(),      // 13: FenceContent
832            "".to_string(),      // 14: FenceContent
833            "".to_string(),      // 15: FenceContent
834            "".to_string(),      // 16: FenceContent
835            "> a".to_string(),   // 17: FenceContent
836            "".to_string(),      // 18: FenceContent
837            ">  ".to_string(),   // 19: FenceContent
838            "".to_string(),      // 20: FenceContent
839            "".to_string(),      // 21: FenceContent
840            "".to_string(),      // 22: FenceContent (last row → FenceMarker?)
841        ];
842        let initial_pb = ParsedBuffer::parse(&initial);
843        eprintln!("initial kinds: {:?}", &initial_pb.kinds);
844
845        let mut edited = initial.clone();
846        edited[9].push(' ');
847        let edited_pb = ParsedBuffer::parse(&edited);
848        eprintln!("edited  kinds: {:?}", &edited_pb.kinds);
849
850        // Compare just the first 10 rows to see where divergence starts
851        for i in 0..23 {
852            if initial_pb.kinds[i] != edited_pb.kinds[i] {
853                eprintln!(
854                    "Row {} differs: initial={:?}, edited={:?}",
855                    i, initial_pb.kinds[i], edited_pb.kinds[i]
856                );
857            }
858        }
859    }
860}
kimun_notes/components/text_editor/parse_incremental.rs

kimun_notes/components/text_editor/
parse_incremental.rs