kimun_notes/components/text_editor/
parse_incremental.rs

1#![allow(dead_code)]
2//! Incremental-parse machinery: line-construct classification cache,
3//! damage-diff against the previous buffer snapshot, safe-boundary
4//! widening, and fence-range derivation. Pure functions only — no
5//! `pulldown_cmark` calls (those live in `markdown.rs`).
6
7use std::ops::Range;
8
9/// Coarse classification of a buffer line for safe-boundary widening.
10///
11/// A line is a *safe boundary* when re-parsing a slice ending on that
12/// line is equivalent to the corresponding slice of a full-buffer parse.
13/// `Blank` and `Plain` are unconditional boundaries when their neighbour
14/// is also `Blank`/`Plain` or end-of-buffer. Structural markers
15/// (`FenceMarker`, `ListMarker`, etc.) are NEVER boundaries — widening
16/// must reach the outer terminator of whatever construct they belong to.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum LineConstructKind {
19    Blank,
20    Plain,
21    FenceMarker,
22    FenceContent,
23    IndentedCode,
24    ListMarker,
25    ListContinuation,
26    Blockquote(u8),
27    SetextUnderline,
28    HtmlBlock,
29    Heading,
30}
31
32/// Result of widening a damaged range to safe construct boundaries.
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub enum WidenResult {
35    /// Widened range; caller passes this to `ParsedBuffer::parse_range`.
36    Widened(Range<usize>),
37    /// Range cannot be cheaply widened (cap trip, unbounded construct).
38    /// Caller falls back to `ParsedBuffer::parse(lines)`.
39    FullRebuild,
40}
41
42/// Maximum fraction of buffer the widened range may cover before we
43/// abandon incremental and fall back to a full parse. Half the buffer
44/// is the empirical cross-over where parse+splice overhead exceeds a
45/// fresh full parse on the same input.
46pub(super) const MAX_INCREMENTAL_FRACTION: f32 = 0.5;
47
48/// Absolute cap on the widened range. Independent of buffer size; keeps
49/// large-fence edits bounded even on small buffers.
50pub(super) const MAX_INCREMENTAL_LINES: usize = 256;
51
52/// Cursor-row hint scan window for `compute_damage_range`. Empirically
53/// covers single-character edits, IME composition of up to 3 graphemes,
54/// and one Enter at line end. Multi-line pastes intentionally fall
55/// through to the LCP/LCS slow path.
56pub(super) const CURSOR_HINT_WINDOW: usize = 4;
57
58/// Compute the row range that differs between `old` and `new`, with a
59/// cursor-row hint to accelerate the common single-character-edit case.
60///
61/// **Contract:** `cursor_row` must be the row that was actually edited
62/// (the editor's cursor position after the keystroke). The fast path
63/// trusts this — if `cursor_row` does not identify the real edit point,
64/// the function may under-report the damaged range for an edit shape
65/// that single-keystroke editing cannot produce. Distant simultaneous
66/// edits are out of scope; they can only happen via programmatic
67/// buffer replacement, which goes through `set_text` and bumps
68/// `text_revision` such that the LCP/LCS slow path is taken naturally
69/// (the cursor row's content will match between old and new, so the
70/// fast path declines and the slow path runs).
71///
72/// Returns `None` when the buffers are byte-identical (defensive
73/// guard — callers should already have gated on `text_revision`).
74///
75/// Fast path: same line count, the row at `cursor_row` differs, and
76/// no other line in `±CURSOR_HINT_WINDOW` differs. Returns
77/// `Some(cursor_row..cursor_row + 1)`. O(`CURSOR_HINT_WINDOW`).
78///
79/// Slow path: longest common prefix (LCP) and longest common suffix
80/// (LCS); damaged range is the middle slice. O(min(buffer_size,
81/// damage_size)).
82pub fn compute_damage_range(
83    old: &[String],
84    new: &[String],
85    cursor_row: usize,
86) -> Option<Range<usize>> {
87    if old == new {
88        return None;
89    }
90
91    // Fast path: same line count, cursor row differs, no other diff in window.
92    if old.len() == new.len() && cursor_row < old.len() && old[cursor_row] != new[cursor_row] {
93        let lo = cursor_row.saturating_sub(CURSOR_HINT_WINDOW);
94        let hi = (cursor_row + CURSOR_HINT_WINDOW + 1).min(old.len());
95        let other_diff_in_window = (lo..hi).any(|i| i != cursor_row && old[i] != new[i]);
96        if !other_diff_in_window {
97            return Some(cursor_row..cursor_row + 1);
98        }
99    }
100
101    // Slow path: longest common prefix + suffix. O(buffer_len)
102    // String equalities; each compare is a length check + at most one
103    // SIMD memcmp on the first-differing byte. ~14µs on a 5000-line
104    // buffer for a single-row backspace.
105    //
106    // A cursor-anchored bound was explored as perf #12 and rejected:
107    //  - Capping the scan at `cursor_row + slack` saves nothing,
108    //    because the scan naturally stops at the first-differing
109    //    row, which IS `cursor_row` for keystroke-driven edits.
110    //  - Starting the LCP scan at `cursor_row - slack` (trusting
111    //    rows above to be unchanged) would skip the prefix scan but
112    //    introduces silent miscompilation risk on edits whose actual
113    //    diff is far from the cursor (paste, undo, programmatic
114    //    edit) — the post-slice verify only checks rows WITHIN the
115    //    widened range, so a misidentified damage range outside
116    //    that range is not caught.
117    //  - Maintaining per-row hashes alongside `lines_snapshot` would
118    //    let us replace string compares with u64 compares, but
119    //    requires plumbing damage hints from the editor's edit
120    //    surface to view.update for incremental hash maintenance —
121    //    bigger change than the 10µs win justifies.
122    //
123    // Until per-row hashes ship as part of a broader edit-surface
124    // refactor, the full O(buffer) scan stays.
125    let lcp = old
126        .iter()
127        .zip(new.iter())
128        .take_while(|(a, b)| a == b)
129        .count();
130    let lcs = old
131        .iter()
132        .rev()
133        .zip(new.iter().rev())
134        .take_while(|(a, b)| a == b)
135        .count();
136    // Guard against overlap when both buffers share a long common stretch.
137    // Clamp lcs so the resulting range is non-empty and start <= end.
138    let new_end = new.len().saturating_sub(lcs);
139    let old_end = old.len().saturating_sub(lcs);
140    let start = lcp.min(new_end).min(old_end);
141    let end = new_end.max(start);
142    Some(start..end)
143}
144
145/// Return true when `kind` is a self-contained, safe boundary line.
146/// Blank lines and ordinary paragraph lines are safe; everything else
147/// belongs to a multi-line construct that widening must include in
148/// full.
149fn is_safe_boundary(kind: LineConstructKind) -> bool {
150    matches!(kind, LineConstructKind::Blank | LineConstructKind::Plain)
151}
152
153/// Walk upward from `damaged_start` (the first damaged row) until the
154/// row just above is a safe boundary. Returns the new start row
155/// (inclusive).
156///
157/// `ListMarker` and `ListContinuation` are non-safe, so the walk
158/// passes through them automatically — landing on the safe row above
159/// the outermost list (Blank, or Plain that is not a continuation),
160/// which is the G1-required outermost-list-ancestor stopping point.
161fn widen_up(kinds: &[LineConstructKind], damaged_start: usize) -> usize {
162    let mut row = damaged_start;
163    while row > 0 {
164        let candidate = row - 1;
165        if is_safe_boundary(kinds[candidate]) {
166            return candidate;
167        }
168        row = candidate;
169    }
170    0
171}
172
173/// Walk downward from `damaged.end` (the first row past the damage)
174/// until we land on a safe boundary or end of buffer. Returns the
175/// exclusive end index.
176fn widen_down(kinds: &[LineConstructKind], damaged_end: usize) -> usize {
177    let mut row = damaged_end;
178    while row < kinds.len() {
179        if is_safe_boundary(kinds[row]) {
180            return row + 1;
181        }
182        row += 1;
183    }
184    kinds.len()
185}
186
187/// Expand `damaged` to the nearest reset boundaries on each side.
188/// A reset boundary is a row where pulldown-cmark's parser state is
189/// provably reset (see `ParsedBuffer::reset_boundaries`), so the
190/// returned range is provably equivalent to a fresh parse over the
191/// same slice — no post-slice verification needed in release.
192///
193/// `boundaries` must be sorted and contain `0` and `lines_len` as
194/// sentinels (every `ParsedBuffer::parse` ensures this). Returns
195/// `FullRebuild` if the expanded range trips either cap (same
196/// semantics as `widen_to_safe`).
197///
198/// This replaces the heuristic `widen_to_safe`-plus-structural-marker
199/// guard tower. The latter is kept available as a behavioural
200/// comparison source for one release cycle (per the openspec
201/// migration plan) before being deleted.
202pub fn expand_to_reset_boundary(
203    boundaries: &[usize],
204    lines_len: usize,
205    damaged: Range<usize>,
206) -> WidenResult {
207    if lines_len == 0 {
208        return WidenResult::FullRebuild;
209    }
210    debug_assert!(
211        damaged.start <= lines_len && damaged.end <= lines_len,
212        "expand_to_reset_boundary: damaged range {:?} out of bounds for lines_len = {}",
213        damaged,
214        lines_len,
215    );
216
217    // Greatest boundary <= damaged.start.
218    let start = boundaries
219        .iter()
220        .rev()
221        .find(|&&b| b <= damaged.start)
222        .copied()
223        .unwrap_or(0);
224    // Least boundary >= damaged.end. Sentinel `lines_len` is always
225    // present in a well-formed boundary set so the `unwrap_or` is
226    // unreachable; kept as a defensive fallback to avoid an inverted
227    // range if the invariant is ever violated.
228    let end = boundaries
229        .iter()
230        .find(|&&b| b >= damaged.end)
231        .copied()
232        .unwrap_or(lines_len);
233
234    let widened_len = end - start;
235    let cap_abs = MAX_INCREMENTAL_LINES;
236    // Same cap policy as widen_to_safe; see its docstring for the
237    // rationale on flooring `cap_frac` at `cap_abs`.
238    let cap_frac = (((lines_len as f32) * MAX_INCREMENTAL_FRACTION) as usize).max(cap_abs);
239    if widened_len > cap_abs || widened_len > cap_frac {
240        return WidenResult::FullRebuild;
241    }
242    WidenResult::Widened(start..end)
243}
244
245/// Widen `damaged` outward to safe construct boundaries, applying
246/// D5's +1 extra row and the D4 cap.
247///
248/// Returns `Widened(range)` when the widened range fits under the cap,
249/// or `FullRebuild` when the cap is exceeded or the buffer is empty.
250///
251/// Kept available for one release cycle as a behavioural comparison
252/// source against `expand_to_reset_boundary` (see openspec change
253/// `parse-reset-boundaries`). New call sites should use
254/// `expand_to_reset_boundary` instead.
255pub fn widen_to_safe(kinds: &[LineConstructKind], damaged: Range<usize>) -> WidenResult {
256    if kinds.is_empty() {
257        return WidenResult::FullRebuild;
258    }
259    debug_assert!(
260        damaged.start <= kinds.len() && damaged.end <= kinds.len(),
261        "widen_to_safe: damaged range {:?} out of bounds for kinds.len() = {}",
262        damaged,
263        kinds.len(),
264    );
265
266    let mut start = widen_up(kinds, damaged.start);
267    let mut end = widen_down(kinds, damaged.end);
268
269    // D5: widen one extra row on each side.
270    start = start.saturating_sub(1);
271    end = (end + 1).min(kinds.len());
272
273    let widened_len = end - start;
274    let cap_abs = MAX_INCREMENTAL_LINES;
275    // Fractional cap encodes the empirical "fresh full parse beats
276    // parse+splice" cross-over. It is only meaningful once full-parse
277    // cost is non-trivial; floor it at `cap_abs` so a 50%-widening on
278    // a tiny buffer (where both options are sub-millisecond) stays on
279    // the incremental path. Above `2 * cap_abs` lines the fractional
280    // cap dominates and catches large widenings the absolute cap
281    // would otherwise miss — this is the regime the previous `&&`
282    // operator left unguarded.
283    let cap_frac = (((kinds.len() as f32) * MAX_INCREMENTAL_FRACTION) as usize).max(cap_abs);
284    if widened_len > cap_abs || widened_len > cap_frac {
285        return WidenResult::FullRebuild;
286    }
287
288    WidenResult::Widened(start..end)
289}
290
291/// Derive fence-range half-open intervals from the per-line construct
292/// kinds. The view layer uses these to decide which logical rows
293/// render `force_raw` (no markdown re-styling, code-block fg color).
294///
295/// Half-open: a fence spanning rows `start..=end_inclusive` (both markers
296/// included) is returned as `start..end_inclusive + 1`. An unclosed
297/// fence runs to the end of the buffer.
298pub fn fence_ranges_from_kinds(kinds: &[LineConstructKind]) -> Vec<Range<usize>> {
299    let mut ranges = Vec::new();
300    let mut i = 0;
301    while i < kinds.len() {
302        if kinds[i] == LineConstructKind::FenceMarker {
303            let start = i;
304            i += 1;
305            while i < kinds.len() && kinds[i] == LineConstructKind::FenceContent {
306                i += 1;
307            }
308            if i < kinds.len() && kinds[i] == LineConstructKind::FenceMarker {
309                ranges.push(start..i + 1);
310                i += 1;
311            } else {
312                // Unclosed fence — extends to end of buffer.
313                ranges.push(start..kinds.len());
314            }
315        } else {
316            i += 1;
317        }
318    }
319    ranges
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use crate::components::text_editor::markdown::ParsedBuffer;
326
327    fn kinds_of(lines: &[&str]) -> Vec<LineConstructKind> {
328        let owned: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
329        ParsedBuffer::parse(&owned).kinds
330    }
331
332    #[test]
333    fn plain_paragraph() {
334        assert_eq!(kinds_of(&["hello world"]), vec![LineConstructKind::Plain]);
335    }
336
337    #[test]
338    fn blank_line() {
339        assert_eq!(kinds_of(&[""]), vec![LineConstructKind::Blank]);
340    }
341
342    #[test]
343    fn atx_heading() {
344        assert_eq!(kinds_of(&["# title"]), vec![LineConstructKind::Heading]);
345    }
346
347    #[test]
348    fn setext_underline_above_is_plain() {
349        let k = kinds_of(&["title", "====="]);
350        assert_eq!(
351            k,
352            vec![LineConstructKind::Plain, LineConstructKind::SetextUnderline]
353        );
354    }
355
356    #[test]
357    fn fence_pair() {
358        let k = kinds_of(&["```rust", "let x = 1;", "```"]);
359        assert_eq!(
360            k,
361            vec![
362                LineConstructKind::FenceMarker,
363                LineConstructKind::FenceContent,
364                LineConstructKind::FenceMarker,
365            ]
366        );
367    }
368
369    #[test]
370    fn list_marker_and_continuation() {
371        let k = kinds_of(&["- item", "  continuation"]);
372        assert_eq!(
373            k,
374            vec![
375                LineConstructKind::ListMarker,
376                LineConstructKind::ListContinuation
377            ]
378        );
379    }
380
381    #[test]
382    fn blockquote_levels() {
383        let k = kinds_of(&[">> two"]);
384        assert_eq!(k, vec![LineConstructKind::Blockquote(2)]);
385    }
386
387    #[test]
388    fn indented_code() {
389        let k = kinds_of(&["", "    let x = 1;"]);
390        assert_eq!(k[1], LineConstructKind::IndentedCode);
391    }
392
393    #[test]
394    fn html_block() {
395        let k = kinds_of(&["<div>", "body", "</div>"]);
396        assert!(matches!(k[0], LineConstructKind::HtmlBlock));
397    }
398
399    #[test]
400    fn inline_html_inside_paragraph_does_not_become_html_block() {
401        // Regression: `Event::InlineHtml` previously painted the
402        // paragraph row as HtmlBlock, defeating safe-boundary widening
403        // for any paragraph containing inline HTML like `<br>` or
404        // `<span>`.
405        let k = kinds_of(&["hello <br> world"]);
406        assert_eq!(
407            k[0],
408            LineConstructKind::Plain,
409            "paragraph with inline HTML must stay Plain"
410        );
411        let k = kinds_of(&["see <span>x</span> end"]);
412        assert_eq!(k[0], LineConstructKind::Plain);
413    }
414
415    fn lines(strs: &[&str]) -> Vec<String> {
416        strs.iter().map(|s| s.to_string()).collect()
417    }
418
419    #[test]
420    fn damage_single_char_insert_uses_cursor_hint() {
421        let old = lines(&["hello", "world"]);
422        let new = lines(&["hello", "worldx"]);
423        assert_eq!(compute_damage_range(&old, &new, 1), Some(1..2));
424    }
425
426    #[test]
427    fn damage_no_change_returns_none() {
428        let old = lines(&["a", "b"]);
429        assert_eq!(compute_damage_range(&old, &old, 0), None);
430    }
431
432    #[test]
433    fn damage_enter_at_line_end_uses_lcp_lcs() {
434        let old = lines(&["alpha", "beta"]);
435        let new = lines(&["alpha", "be", "ta"]);
436        let dmg = compute_damage_range(&old, &new, 1).unwrap();
437        assert_eq!(dmg.start, 1);
438        assert_eq!(dmg.end, new.len()); // damaged = [1..3)
439    }
440
441    #[test]
442    fn damage_backspace_merging_lines() {
443        let old = lines(&["alpha", "beta", "gamma"]);
444        let new = lines(&["alphabeta", "gamma"]);
445        let dmg = compute_damage_range(&old, &new, 0).unwrap();
446        assert_eq!(dmg.start, 0);
447    }
448
449    #[test]
450    fn damage_multi_diff_within_window_falls_through_to_slow_path() {
451        // Two rows differ, both within CURSOR_HINT_WINDOW of the cursor.
452        // Fast path's other-diff-in-window check trips → LCP/LCS slow path.
453        let old = lines(&["a", "b", "c", "d", "e"]);
454        let mut new = old.clone();
455        new[1] = "B".to_string();
456        new[2] = "C".to_string();
457        // Cursor at row 1; the window covers rows 0..=4 (full buffer here).
458        let dmg = compute_damage_range(&old, &new, 1).unwrap();
459        // Slow path: LCP=1, LCS=2 → 1..3
460        assert_eq!(dmg, 1..3);
461    }
462
463    fn kinds_str(s: &str) -> Vec<LineConstructKind> {
464        // Compact spec: one char per line.
465        // P=Plain, B=Blank, F=FenceMarker, C=FenceContent,
466        // L=ListMarker, l=ListContinuation, Q=Blockquote(1),
467        // S=SetextUnderline, H=Heading, I=IndentedCode, X=HtmlBlock.
468        s.chars()
469            .map(|c| match c {
470                'P' => LineConstructKind::Plain,
471                'B' => LineConstructKind::Blank,
472                'F' => LineConstructKind::FenceMarker,
473                'C' => LineConstructKind::FenceContent,
474                'L' => LineConstructKind::ListMarker,
475                'l' => LineConstructKind::ListContinuation,
476                'Q' => LineConstructKind::Blockquote(1),
477                'S' => LineConstructKind::SetextUnderline,
478                'H' => LineConstructKind::Heading,
479                'I' => LineConstructKind::IndentedCode,
480                'X' => LineConstructKind::HtmlBlock,
481                _ => panic!("bad kind char {c}"),
482            })
483            .collect()
484    }
485
486    #[test]
487    fn widen_plain_paragraph_to_blank_boundaries() {
488        // P B P P P B P — damage row 3 → widen to blank rows 1 and 5
489        // (plus the D5 +1 each side: 0 and 6 — but the buffer ends are
490        // also boundaries; clamp).
491        let k = kinds_str("PBPPPBP");
492        match widen_to_safe(&k, 3..4) {
493            WidenResult::Widened(r) => {
494                // Must include the blank rows at 1 and 5 (or wider).
495                assert!(r.start <= 1, "widen.start <= 1, got {}", r.start);
496                assert!(r.end >= 6, "widen.end >= 6, got {}", r.end);
497            }
498            x => panic!("expected Widened, got {x:?}"),
499        }
500    }
501
502    #[test]
503    fn widen_fence_interior_includes_both_markers() {
504        // P B F C C C F B P — damage row 4 (inside fence) → widen
505        // to include both fence markers + one extra line on each side.
506        let k = kinds_str("PBFCCCFBP");
507        match widen_to_safe(&k, 4..5) {
508            WidenResult::Widened(r) => {
509                assert!(
510                    r.start <= 2,
511                    "must include opening fence marker at row 2, got start {}",
512                    r.start
513                );
514                assert!(
515                    r.end >= 7,
516                    "must include closing fence marker at row 6 (end >= 7), got end {}",
517                    r.end
518                );
519            }
520            x => panic!("expected Widened, got {x:?}"),
521        }
522    }
523
524    #[test]
525    fn widen_list_continuation_reaches_outermost_marker() {
526        // L l L l l l B P — damage at row 4 (nested continuation) → widen
527        // up to outermost ListMarker at row 0.
528        let k = kinds_str("LlLlllBP");
529        match widen_to_safe(&k, 4..5) {
530            WidenResult::Widened(r) => assert_eq!(r.start, 0, "must reach col-0 list marker"),
531            x => panic!("expected Widened, got {x:?}"),
532        }
533    }
534
535    #[test]
536    fn widen_setext_underline_includes_text_line_above() {
537        // P S P — damage at row 1 (underline) → widen to include row 0
538        // (heading text line).
539        let k = kinds_str("PSP");
540        match widen_to_safe(&k, 1..2) {
541            WidenResult::Widened(r) => {
542                assert_eq!(r.start, 0, "must include row above setext underline")
543            }
544            x => panic!("expected Widened, got {x:?}"),
545        }
546    }
547
548    #[test]
549    fn widen_html_block_includes_whole_block() {
550        // P X X X B P — damage at row 2 (middle of HTML) → widen to
551        // include all HtmlBlock rows.
552        let k = kinds_str("PXXXBP");
553        match widen_to_safe(&k, 2..3) {
554            WidenResult::Widened(r) => {
555                assert!(
556                    r.start <= 1,
557                    "must include first HtmlBlock row, got start {}",
558                    r.start
559                );
560                assert!(
561                    r.end >= 4,
562                    "must include last HtmlBlock row, got end {}",
563                    r.end
564                );
565            }
566            x => panic!("expected Widened, got {x:?}"),
567        }
568    }
569
570    #[test]
571    fn widen_exceeds_cap_returns_full_rebuild() {
572        // 300-line all-FenceContent buffer; the damage is one line;
573        // widening tries to reach the fence ends but the buffer is
574        // uniformly fence content, so widening goes to 0..300, which
575        // exceeds MAX_INCREMENTAL_LINES (256).
576        let k = vec![LineConstructKind::FenceContent; 300];
577        assert_eq!(widen_to_safe(&k, 150..151), WidenResult::FullRebuild);
578    }
579
580    #[test]
581    fn widen_trips_when_fractional_cap_exceeds_absolute() {
582        // Regression: cap-trip used `&&` instead of `||`, so on a buffer
583        // big enough that `cap_frac > cap_abs` (kinds.len() > 512), a
584        // widened range between the two thresholds slipped through.
585        // 600-line buffer of FenceContent → cap_abs=256, cap_frac=300.
586        // Widening covers the whole buffer (no safe boundaries), so
587        // widened_len=600 must trip the fallback.
588        let k = vec![LineConstructKind::FenceContent; 600];
589        assert_eq!(widen_to_safe(&k, 300..301), WidenResult::FullRebuild);
590    }
591
592    #[test]
593    fn widen_at_buffer_start_clamps_to_zero() {
594        let k = kinds_str("PPPPP");
595        match widen_to_safe(&k, 0..1) {
596            WidenResult::Widened(r) => assert_eq!(r.start, 0),
597            x => panic!("expected Widened, got {x:?}"),
598        }
599    }
600
601    #[test]
602    fn widen_at_buffer_end_clamps_to_len() {
603        let k = kinds_str("PPPPP");
604        match widen_to_safe(&k, 4..5) {
605            WidenResult::Widened(r) => assert_eq!(r.end, 5),
606            x => panic!("expected Widened, got {x:?}"),
607        }
608    }
609
610    #[test]
611    fn parse_records_boundaries_for_blank_separated_paragraphs() {
612        // Realistic markdown layout: each paragraph followed by a
613        // blank line. Pulldown ends each Paragraph; depth drops to
614        // 0 at the following blank row. The boundary set should
615        // contain every blank row.
616        use super::super::markdown::ParsedBuffer;
617        let mut lines: Vec<String> = Vec::with_capacity(8);
618        for i in 0..4 {
619            lines.push(format!("paragraph {i}"));
620            lines.push(String::new());
621        }
622        let pb = ParsedBuffer::parse(&lines);
623        // Expected: 0, then every Blank row (1, 3, 5, 7), then lines.len() (8).
624        // The blank at row 7 == lines.len()-1 may or may not be
625        // present depending on whether depth==0 was reached at that
626        // row; check the interior at least.
627        assert!(pb.reset_boundaries.contains(&0), "sentinel 0 missing");
628        assert!(
629            pb.reset_boundaries.contains(&lines.len()),
630            "sentinel lines.len() missing"
631        );
632        assert!(
633            pb.reset_boundaries.contains(&1),
634            "blank after paragraph 0 should be a boundary, got {:?}",
635            pb.reset_boundaries
636        );
637        assert!(
638            pb.reset_boundaries.contains(&3),
639            "blank after paragraph 1 should be a boundary, got {:?}",
640            pb.reset_boundaries
641        );
642    }
643
644    #[test]
645    fn expand_to_reset_uses_nearest_sentinels() {
646        // Only sentinels [0, 5] in the boundary set — every edit
647        // expands to the full buffer.
648        let boundaries = vec![0, 5];
649        match expand_to_reset_boundary(&boundaries, 5, 2..3) {
650            WidenResult::Widened(r) => assert_eq!(r, 0..5),
651            x => panic!("expected Widened, got {x:?}"),
652        }
653    }
654
655    #[test]
656    fn expand_to_reset_snaps_to_interior_boundaries() {
657        // Boundaries at rows 0, 3, 6, 10 (e.g. blank-separated
658        // blocks). Damage at row 4 expands to 3..6.
659        let boundaries = vec![0, 3, 6, 10];
660        match expand_to_reset_boundary(&boundaries, 10, 4..5) {
661            WidenResult::Widened(r) => assert_eq!(r, 3..6),
662            x => panic!("expected Widened, got {x:?}"),
663        }
664    }
665
666    #[test]
667    fn expand_to_reset_damage_at_exact_boundary_is_zero_span() {
668        // Damage range coincides with a boundary point. The function
669        // returns the smallest enclosing boundary pair.
670        let boundaries = vec![0, 3, 6, 10];
671        // damaged.start == damaged.end == 6. Expands to 6..6 (empty).
672        match expand_to_reset_boundary(&boundaries, 10, 6..6) {
673            WidenResult::Widened(r) => assert_eq!(r, 6..6),
674            x => panic!("expected Widened, got {x:?}"),
675        }
676    }
677
678    #[test]
679    fn expand_to_reset_empty_buffer_falls_back() {
680        let boundaries = vec![0];
681        assert_eq!(
682            expand_to_reset_boundary(&boundaries, 0, 0..0),
683            WidenResult::FullRebuild
684        );
685    }
686
687    #[test]
688    fn expand_to_reset_caps_trip_fallback() {
689        // 600-row buffer, no interior boundaries. Damage at 300
690        // expands to 0..600 which exceeds cap_abs (256) and cap_frac
691        // (300, floored at cap_abs).
692        let boundaries = vec![0, 600];
693        assert_eq!(
694            expand_to_reset_boundary(&boundaries, 600, 300..301),
695            WidenResult::FullRebuild
696        );
697    }
698
699    #[test]
700    fn widen_blockquote_includes_whole_block() {
701        // P Q Q Q B P — damage in the middle of a blockquote → widen
702        // to include the whole blockquote.
703        let k = kinds_str("PQQQBP");
704        match widen_to_safe(&k, 2..3) {
705            WidenResult::Widened(r) => {
706                assert!(
707                    r.start <= 1,
708                    "must include first Blockquote row, got start {}",
709                    r.start
710                );
711                assert!(
712                    r.end >= 4,
713                    "must include last Blockquote row, got end {}",
714                    r.end
715                );
716            }
717            x => panic!("expected Widened, got {x:?}"),
718        }
719    }
720
721    #[test]
722    fn widen_multi_list_does_not_over_pull_across_blank() {
723        // Two independent lists separated by a blank line. Damage in
724        // the second list must not pull the first list into the slice.
725        let k = kinds_str("LlBLll");
726        match widen_to_safe(&k, 4..5) {
727            WidenResult::Widened(r) => {
728                // The blank at row 2 is the separator. Widening must
729                // stop there (or at the row above, after D5 +1).
730                assert!(
731                    r.start >= 1,
732                    "widen.start must be >= 1 (D5 may pull past Blank by one row), got {}",
733                    r.start
734                );
735                assert!(
736                    r.start <= 2,
737                    "widen.start must not pull in list A, got {}",
738                    r.start
739                );
740            }
741            x => panic!("expected Widened, got {x:?}"),
742        }
743    }
744
745    #[test]
746    fn fence_ranges_single_fence() {
747        // P F C C F P — fence covers rows 1..5 (half-open: both markers + content).
748        let k = kinds_str("PFCCFP");
749        let r = fence_ranges_from_kinds(&k);
750        assert_eq!(r, vec![1..5]);
751    }
752
753    #[test]
754    fn fence_ranges_two_fences() {
755        // F C F P F C F — two fences at 0..3 and 4..7.
756        let k = kinds_str("FCFPFCF");
757        let r = fence_ranges_from_kinds(&k);
758        assert_eq!(r, vec![0..3, 4..7]);
759    }
760
761    #[test]
762    fn fence_ranges_unclosed_extends_to_end() {
763        // P F C C C — unclosed fence runs to end of buffer.
764        let k = kinds_str("PFCCC");
765        let r = fence_ranges_from_kinds(&k);
766        assert_eq!(r, vec![1..5]);
767    }
768
769    #[test]
770    fn fence_ranges_empty() {
771        assert!(fence_ranges_from_kinds(&[]).is_empty());
772    }
773
774    #[test]
775    fn investigate_list_fence_indented_code_interaction() {
776        // Initial: row 7 "    a" is after "- a" (row 1) with 5 blank lines in between.
777        // After editing row 9 (blank → space inside fence), fresh parse changes row 7.
778        let initial: Vec<String> = vec![
779            "".to_string(),      // 0: Blank
780            "- a".to_string(),   // 1: ListMarker
781            "".to_string(),      // 2: Blank
782            "".to_string(),      // 3: Blank
783            "".to_string(),      // 4: Blank
784            "".to_string(),      // 5: Blank
785            "".to_string(),      // 6: Blank
786            "    a".to_string(), // 7: ? - before fence
787            "```".to_string(),   // 8: FenceMarker
788            "".to_string(),      // 9: FenceContent -> edit to " "
789            "".to_string(),      // 10: FenceContent
790            "".to_string(),      // 11: FenceContent
791            "".to_string(),      // 12: FenceContent
792            "".to_string(),      // 13: FenceContent
793            "".to_string(),      // 14: FenceContent
794            "".to_string(),      // 15: FenceContent
795            "".to_string(),      // 16: FenceContent
796            "> a".to_string(),   // 17: FenceContent
797            "".to_string(),      // 18: FenceContent
798            ">  ".to_string(),   // 19: FenceContent
799            "".to_string(),      // 20: FenceContent
800            "".to_string(),      // 21: FenceContent
801            "".to_string(),      // 22: FenceContent (last row → FenceMarker?)
802        ];
803        let initial_pb = ParsedBuffer::parse(&initial);
804        eprintln!("initial kinds: {:?}", &initial_pb.kinds);
805
806        let mut edited = initial.clone();
807        edited[9].push(' ');
808        let edited_pb = ParsedBuffer::parse(&edited);
809        eprintln!("edited  kinds: {:?}", &edited_pb.kinds);
810
811        // Compare just the first 10 rows to see where divergence starts
812        for i in 0..23 {
813            if initial_pb.kinds[i] != edited_pb.kinds[i] {
814                eprintln!(
815                    "Row {} differs: initial={:?}, edited={:?}",
816                    i, initial_pb.kinds[i], edited_pb.kinds[i]
817                );
818            }
819        }
820    }
821}
kimun_notes/components/text_editor/parse_incremental.rs

kimun_notes/components/text_editor/
parse_incremental.rs