kimun_notes/components/text_editor/parse_incremental.rs
1#![allow(dead_code)]
2//! Incremental-parse machinery: line-construct classification cache,
3//! damage-diff against the previous buffer snapshot, safe-boundary
4//! widening, and fence-range derivation. Pure functions only — no
5//! `pulldown_cmark` calls (those live in `markdown.rs`).
6
7use std::ops::Range;
8
9/// Coarse classification of a buffer line for safe-boundary widening.
10///
11/// A line is a *safe boundary* when re-parsing a slice ending on that
12/// line is equivalent to the corresponding slice of a full-buffer parse.
13/// `Blank` and `Plain` are unconditional boundaries when their neighbour
14/// is also `Blank`/`Plain` or end-of-buffer. Structural markers
15/// (`FenceMarker`, `ListMarker`, etc.) are NEVER boundaries — widening
16/// must reach the outer terminator of whatever construct they belong to.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum LineConstructKind {
19 Blank,
20 Plain,
21 FenceMarker,
22 FenceContent,
23 IndentedCode,
24 ListMarker,
25 ListContinuation,
26 Blockquote(u8),
27 SetextUnderline,
28 HtmlBlock,
29 Heading,
30}
31
32/// Result of widening a damaged range to safe construct boundaries.
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub enum WidenResult {
35 /// Widened range; caller passes this to `ParsedBuffer::parse_range`.
36 Widened(Range<usize>),
37 /// Range cannot be cheaply widened (cap trip, unbounded construct).
38 /// Caller falls back to `ParsedBuffer::parse(lines)`.
39 FullRebuild,
40}
41
42/// Maximum fraction of buffer the widened range may cover before we
43/// abandon incremental and fall back to a full parse. Half the buffer
44/// is the empirical cross-over where parse+splice overhead exceeds a
45/// fresh full parse on the same input.
46pub(super) const MAX_INCREMENTAL_FRACTION: f32 = 0.5;
47
48/// Absolute cap on the widened range. Independent of buffer size; keeps
49/// large-fence edits bounded even on small buffers.
50pub(super) const MAX_INCREMENTAL_LINES: usize = 256;
51
52/// Cursor-row hint scan window for `compute_damage_range`. Empirically
53/// covers single-character edits, IME composition of up to 3 graphemes,
54/// and one Enter at line end. Multi-line pastes intentionally fall
55/// through to the LCP/LCS slow path.
56pub(super) const CURSOR_HINT_WINDOW: usize = 4;
57
58/// Compute the row range that differs between `old` and `new`, with a
59/// cursor-row hint to accelerate the common single-character-edit case.
60///
61/// **Contract:** `cursor_row` must be the row that was actually edited
62/// (the editor's cursor position after the keystroke). The fast path
63/// trusts this — if `cursor_row` does not identify the real edit point,
64/// the function may under-report the damaged range for an edit shape
65/// that single-keystroke editing cannot produce. Distant simultaneous
66/// edits are out of scope; they can only happen via programmatic
67/// buffer replacement, which goes through `set_text` and bumps
68/// `text_revision` such that the LCP/LCS slow path is taken naturally
69/// (the cursor row's content will match between old and new, so the
70/// fast path declines and the slow path runs).
71///
72/// Returns `None` when the buffers are byte-identical (defensive
73/// guard — callers should already have gated on `text_revision`).
74///
75/// Fast path: same line count, the row at `cursor_row` differs, and
76/// no other line in `±CURSOR_HINT_WINDOW` differs. Returns
77/// `Some(cursor_row..cursor_row + 1)`. O(`CURSOR_HINT_WINDOW`).
78///
79/// Slow path: longest common prefix (LCP) and longest common suffix
80/// (LCS); damaged range is the middle slice. O(min(buffer_size,
81/// damage_size)).
82pub fn compute_damage_range(
83 old: &[String],
84 new: &[String],
85 cursor_row: usize,
86) -> Option<Range<usize>> {
87 if old == new {
88 return None;
89 }
90
91 // Fast path: same line count, cursor row differs, no other diff in window.
92 if old.len() == new.len() && cursor_row < old.len() && old[cursor_row] != new[cursor_row] {
93 let lo = cursor_row.saturating_sub(CURSOR_HINT_WINDOW);
94 let hi = (cursor_row + CURSOR_HINT_WINDOW + 1).min(old.len());
95 let other_diff_in_window = (lo..hi).any(|i| i != cursor_row && old[i] != new[i]);
96 if !other_diff_in_window {
97 return Some(cursor_row..cursor_row + 1);
98 }
99 }
100
101 // Slow path: longest common prefix + suffix. O(buffer_len)
102 // String equalities; each compare is a length check + at most one
103 // SIMD memcmp on the first-differing byte. ~14µs on a 5000-line
104 // buffer for a single-row backspace.
105 //
106 // A cursor-anchored bound was explored as perf #12 and rejected:
107 // - Capping the scan at `cursor_row + slack` saves nothing,
108 // because the scan naturally stops at the first-differing
109 // row, which IS `cursor_row` for keystroke-driven edits.
110 // - Starting the LCP scan at `cursor_row - slack` (trusting
111 // rows above to be unchanged) would skip the prefix scan but
112 // introduces silent miscompilation risk on edits whose actual
113 // diff is far from the cursor (paste, undo, programmatic
114 // edit) — the post-slice verify only checks rows WITHIN the
115 // widened range, so a misidentified damage range outside
116 // that range is not caught.
117 // - Maintaining per-row hashes alongside `lines_snapshot` would
118 // let us replace string compares with u64 compares, but
119 // requires plumbing damage hints from the editor's edit
120 // surface to view.update for incremental hash maintenance —
121 // bigger change than the 10µs win justifies.
122 //
123 // Until per-row hashes ship as part of a broader edit-surface
124 // refactor, the full O(buffer) scan stays.
125 let lcp = old
126 .iter()
127 .zip(new.iter())
128 .take_while(|(a, b)| a == b)
129 .count();
130 let lcs = old
131 .iter()
132 .rev()
133 .zip(new.iter().rev())
134 .take_while(|(a, b)| a == b)
135 .count();
136 // Guard against overlap when both buffers share a long common stretch.
137 // Clamp lcs so the resulting range is non-empty and start <= end.
138 let new_end = new.len().saturating_sub(lcs);
139 let old_end = old.len().saturating_sub(lcs);
140 let start = lcp.min(new_end).min(old_end);
141 let end = new_end.max(start);
142 Some(start..end)
143}
144
145/// Return true when `kind` is a self-contained, safe boundary line.
146/// Blank lines and ordinary paragraph lines are safe; everything else
147/// belongs to a multi-line construct that widening must include in
148/// full.
149fn is_safe_boundary(kind: LineConstructKind) -> bool {
150 matches!(kind, LineConstructKind::Blank | LineConstructKind::Plain)
151}
152
153/// Walk upward from `damaged_start` (the first damaged row) until the
154/// row just above is a safe boundary. Returns the new start row
155/// (inclusive).
156///
157/// `ListMarker` and `ListContinuation` are non-safe, so the walk
158/// passes through them automatically — landing on the safe row above
159/// the outermost list (Blank, or Plain that is not a continuation),
160/// which is the G1-required outermost-list-ancestor stopping point.
161fn widen_up(kinds: &[LineConstructKind], damaged_start: usize) -> usize {
162 let mut row = damaged_start;
163 while row > 0 {
164 let candidate = row - 1;
165 if is_safe_boundary(kinds[candidate]) {
166 return candidate;
167 }
168 row = candidate;
169 }
170 0
171}
172
173/// Walk downward from `damaged.end` (the first row past the damage)
174/// until we land on a safe boundary or end of buffer. Returns the
175/// exclusive end index.
176fn widen_down(kinds: &[LineConstructKind], damaged_end: usize) -> usize {
177 let mut row = damaged_end;
178 while row < kinds.len() {
179 if is_safe_boundary(kinds[row]) {
180 return row + 1;
181 }
182 row += 1;
183 }
184 kinds.len()
185}
186
187/// Expand `damaged` to the nearest reset boundaries on each side.
188/// A reset boundary is a row where pulldown-cmark's parser state is
189/// provably reset (see `ParsedBuffer::reset_boundaries`), so the
190/// returned range is provably equivalent to a fresh parse over the
191/// same slice — no post-slice verification needed in release.
192///
193/// `boundaries` must be sorted and contain `0` and `lines_len` as
194/// sentinels (every `ParsedBuffer::parse` ensures this). Returns
195/// `FullRebuild` if the expanded range trips either cap (same
196/// semantics as `widen_to_safe`).
197///
198/// This replaces the heuristic `widen_to_safe`-plus-structural-marker
199/// guard tower. The latter is kept available as a behavioural
200/// comparison source for one release cycle (per the openspec
201/// migration plan) before being deleted.
202pub fn expand_to_reset_boundary(
203 boundaries: &[usize],
204 lines_len: usize,
205 damaged: Range<usize>,
206) -> WidenResult {
207 if lines_len == 0 {
208 return WidenResult::FullRebuild;
209 }
210 debug_assert!(
211 damaged.start <= lines_len && damaged.end <= lines_len,
212 "expand_to_reset_boundary: damaged range {:?} out of bounds for lines_len = {}",
213 damaged,
214 lines_len,
215 );
216
217 // Greatest boundary <= damaged.start.
218 let start = boundaries
219 .iter()
220 .rev()
221 .find(|&&b| b <= damaged.start)
222 .copied()
223 .unwrap_or(0);
224 // Least boundary >= damaged.end. Sentinel `lines_len` is always
225 // present in a well-formed boundary set so the `unwrap_or` is
226 // unreachable; kept as a defensive fallback to avoid an inverted
227 // range if the invariant is ever violated.
228 let end = boundaries
229 .iter()
230 .find(|&&b| b >= damaged.end)
231 .copied()
232 .unwrap_or(lines_len);
233
234 let widened_len = end - start;
235 let cap_abs = MAX_INCREMENTAL_LINES;
236 // Same cap policy as widen_to_safe; see its docstring for the
237 // rationale on flooring `cap_frac` at `cap_abs`.
238 let cap_frac = (((lines_len as f32) * MAX_INCREMENTAL_FRACTION) as usize).max(cap_abs);
239 if widened_len > cap_abs || widened_len > cap_frac {
240 return WidenResult::FullRebuild;
241 }
242 WidenResult::Widened(start..end)
243}
244
245/// Widen `damaged` outward to safe construct boundaries, applying
246/// D5's +1 extra row and the D4 cap.
247///
248/// Returns `Widened(range)` when the widened range fits under the cap,
249/// or `FullRebuild` when the cap is exceeded or the buffer is empty.
250///
251/// Kept available for one release cycle as a behavioural comparison
252/// source against `expand_to_reset_boundary` (see openspec change
253/// `parse-reset-boundaries`). New call sites should use
254/// `expand_to_reset_boundary` instead.
255pub fn widen_to_safe(kinds: &[LineConstructKind], damaged: Range<usize>) -> WidenResult {
256 if kinds.is_empty() {
257 return WidenResult::FullRebuild;
258 }
259 debug_assert!(
260 damaged.start <= kinds.len() && damaged.end <= kinds.len(),
261 "widen_to_safe: damaged range {:?} out of bounds for kinds.len() = {}",
262 damaged,
263 kinds.len(),
264 );
265
266 let mut start = widen_up(kinds, damaged.start);
267 let mut end = widen_down(kinds, damaged.end);
268
269 // D5: widen one extra row on each side.
270 start = start.saturating_sub(1);
271 end = (end + 1).min(kinds.len());
272
273 let widened_len = end - start;
274 let cap_abs = MAX_INCREMENTAL_LINES;
275 // Fractional cap encodes the empirical "fresh full parse beats
276 // parse+splice" cross-over. It is only meaningful once full-parse
277 // cost is non-trivial; floor it at `cap_abs` so a 50%-widening on
278 // a tiny buffer (where both options are sub-millisecond) stays on
279 // the incremental path. Above `2 * cap_abs` lines the fractional
280 // cap dominates and catches large widenings the absolute cap
281 // would otherwise miss — this is the regime the previous `&&`
282 // operator left unguarded.
283 let cap_frac = (((kinds.len() as f32) * MAX_INCREMENTAL_FRACTION) as usize).max(cap_abs);
284 if widened_len > cap_abs || widened_len > cap_frac {
285 return WidenResult::FullRebuild;
286 }
287
288 WidenResult::Widened(start..end)
289}
290
291/// Derive fence-range half-open intervals from the per-line construct
292/// kinds. The view layer uses these to decide which logical rows
293/// render `force_raw` (no markdown re-styling, code-block fg color).
294///
295/// Half-open: a fence spanning rows `start..=end_inclusive` (both markers
296/// included) is returned as `start..end_inclusive + 1`. An unclosed
297/// fence runs to the end of the buffer.
298pub fn fence_ranges_from_kinds(kinds: &[LineConstructKind]) -> Vec<Range<usize>> {
299 let mut ranges = Vec::new();
300 let mut i = 0;
301 while i < kinds.len() {
302 if kinds[i] == LineConstructKind::FenceMarker {
303 let start = i;
304 i += 1;
305 while i < kinds.len() && kinds[i] == LineConstructKind::FenceContent {
306 i += 1;
307 }
308 if i < kinds.len() && kinds[i] == LineConstructKind::FenceMarker {
309 ranges.push(start..i + 1);
310 i += 1;
311 } else {
312 // Unclosed fence — extends to end of buffer.
313 ranges.push(start..kinds.len());
314 }
315 } else {
316 i += 1;
317 }
318 }
319 ranges
320}
321
322#[cfg(test)]
323mod tests {
324 use super::*;
325 use crate::components::text_editor::markdown::ParsedBuffer;
326
327 fn kinds_of(lines: &[&str]) -> Vec<LineConstructKind> {
328 let owned: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
329 ParsedBuffer::parse(&owned).kinds
330 }
331
332 #[test]
333 fn plain_paragraph() {
334 assert_eq!(kinds_of(&["hello world"]), vec![LineConstructKind::Plain]);
335 }
336
337 #[test]
338 fn blank_line() {
339 assert_eq!(kinds_of(&[""]), vec![LineConstructKind::Blank]);
340 }
341
342 #[test]
343 fn atx_heading() {
344 assert_eq!(kinds_of(&["# title"]), vec![LineConstructKind::Heading]);
345 }
346
347 #[test]
348 fn setext_underline_above_is_plain() {
349 let k = kinds_of(&["title", "====="]);
350 assert_eq!(
351 k,
352 vec![LineConstructKind::Plain, LineConstructKind::SetextUnderline]
353 );
354 }
355
356 #[test]
357 fn fence_pair() {
358 let k = kinds_of(&["```rust", "let x = 1;", "```"]);
359 assert_eq!(
360 k,
361 vec![
362 LineConstructKind::FenceMarker,
363 LineConstructKind::FenceContent,
364 LineConstructKind::FenceMarker,
365 ]
366 );
367 }
368
369 #[test]
370 fn list_marker_and_continuation() {
371 let k = kinds_of(&["- item", " continuation"]);
372 assert_eq!(
373 k,
374 vec![
375 LineConstructKind::ListMarker,
376 LineConstructKind::ListContinuation
377 ]
378 );
379 }
380
381 #[test]
382 fn blockquote_levels() {
383 let k = kinds_of(&[">> two"]);
384 assert_eq!(k, vec![LineConstructKind::Blockquote(2)]);
385 }
386
387 #[test]
388 fn indented_code() {
389 let k = kinds_of(&["", " let x = 1;"]);
390 assert_eq!(k[1], LineConstructKind::IndentedCode);
391 }
392
393 #[test]
394 fn html_block() {
395 let k = kinds_of(&["<div>", "body", "</div>"]);
396 assert!(matches!(k[0], LineConstructKind::HtmlBlock));
397 }
398
399 #[test]
400 fn inline_html_inside_paragraph_does_not_become_html_block() {
401 // Regression: `Event::InlineHtml` previously painted the
402 // paragraph row as HtmlBlock, defeating safe-boundary widening
403 // for any paragraph containing inline HTML like `<br>` or
404 // `<span>`.
405 let k = kinds_of(&["hello <br> world"]);
406 assert_eq!(
407 k[0],
408 LineConstructKind::Plain,
409 "paragraph with inline HTML must stay Plain"
410 );
411 let k = kinds_of(&["see <span>x</span> end"]);
412 assert_eq!(k[0], LineConstructKind::Plain);
413 }
414
415 fn lines(strs: &[&str]) -> Vec<String> {
416 strs.iter().map(|s| s.to_string()).collect()
417 }
418
419 #[test]
420 fn damage_single_char_insert_uses_cursor_hint() {
421 let old = lines(&["hello", "world"]);
422 let new = lines(&["hello", "worldx"]);
423 assert_eq!(compute_damage_range(&old, &new, 1), Some(1..2));
424 }
425
426 #[test]
427 fn damage_no_change_returns_none() {
428 let old = lines(&["a", "b"]);
429 assert_eq!(compute_damage_range(&old, &old, 0), None);
430 }
431
432 #[test]
433 fn damage_enter_at_line_end_uses_lcp_lcs() {
434 let old = lines(&["alpha", "beta"]);
435 let new = lines(&["alpha", "be", "ta"]);
436 let dmg = compute_damage_range(&old, &new, 1).unwrap();
437 assert_eq!(dmg.start, 1);
438 assert_eq!(dmg.end, new.len()); // damaged = [1..3)
439 }
440
441 #[test]
442 fn damage_backspace_merging_lines() {
443 let old = lines(&["alpha", "beta", "gamma"]);
444 let new = lines(&["alphabeta", "gamma"]);
445 let dmg = compute_damage_range(&old, &new, 0).unwrap();
446 assert_eq!(dmg.start, 0);
447 }
448
449 #[test]
450 fn damage_multi_diff_within_window_falls_through_to_slow_path() {
451 // Two rows differ, both within CURSOR_HINT_WINDOW of the cursor.
452 // Fast path's other-diff-in-window check trips → LCP/LCS slow path.
453 let old = lines(&["a", "b", "c", "d", "e"]);
454 let mut new = old.clone();
455 new[1] = "B".to_string();
456 new[2] = "C".to_string();
457 // Cursor at row 1; the window covers rows 0..=4 (full buffer here).
458 let dmg = compute_damage_range(&old, &new, 1).unwrap();
459 // Slow path: LCP=1, LCS=2 → 1..3
460 assert_eq!(dmg, 1..3);
461 }
462
463 fn kinds_str(s: &str) -> Vec<LineConstructKind> {
464 // Compact spec: one char per line.
465 // P=Plain, B=Blank, F=FenceMarker, C=FenceContent,
466 // L=ListMarker, l=ListContinuation, Q=Blockquote(1),
467 // S=SetextUnderline, H=Heading, I=IndentedCode, X=HtmlBlock.
468 s.chars()
469 .map(|c| match c {
470 'P' => LineConstructKind::Plain,
471 'B' => LineConstructKind::Blank,
472 'F' => LineConstructKind::FenceMarker,
473 'C' => LineConstructKind::FenceContent,
474 'L' => LineConstructKind::ListMarker,
475 'l' => LineConstructKind::ListContinuation,
476 'Q' => LineConstructKind::Blockquote(1),
477 'S' => LineConstructKind::SetextUnderline,
478 'H' => LineConstructKind::Heading,
479 'I' => LineConstructKind::IndentedCode,
480 'X' => LineConstructKind::HtmlBlock,
481 _ => panic!("bad kind char {c}"),
482 })
483 .collect()
484 }
485
486 #[test]
487 fn widen_plain_paragraph_to_blank_boundaries() {
488 // P B P P P B P — damage row 3 → widen to blank rows 1 and 5
489 // (plus the D5 +1 each side: 0 and 6 — but the buffer ends are
490 // also boundaries; clamp).
491 let k = kinds_str("PBPPPBP");
492 match widen_to_safe(&k, 3..4) {
493 WidenResult::Widened(r) => {
494 // Must include the blank rows at 1 and 5 (or wider).
495 assert!(r.start <= 1, "widen.start <= 1, got {}", r.start);
496 assert!(r.end >= 6, "widen.end >= 6, got {}", r.end);
497 }
498 x => panic!("expected Widened, got {x:?}"),
499 }
500 }
501
502 #[test]
503 fn widen_fence_interior_includes_both_markers() {
504 // P B F C C C F B P — damage row 4 (inside fence) → widen
505 // to include both fence markers + one extra line on each side.
506 let k = kinds_str("PBFCCCFBP");
507 match widen_to_safe(&k, 4..5) {
508 WidenResult::Widened(r) => {
509 assert!(
510 r.start <= 2,
511 "must include opening fence marker at row 2, got start {}",
512 r.start
513 );
514 assert!(
515 r.end >= 7,
516 "must include closing fence marker at row 6 (end >= 7), got end {}",
517 r.end
518 );
519 }
520 x => panic!("expected Widened, got {x:?}"),
521 }
522 }
523
524 #[test]
525 fn widen_list_continuation_reaches_outermost_marker() {
526 // L l L l l l B P — damage at row 4 (nested continuation) → widen
527 // up to outermost ListMarker at row 0.
528 let k = kinds_str("LlLlllBP");
529 match widen_to_safe(&k, 4..5) {
530 WidenResult::Widened(r) => assert_eq!(r.start, 0, "must reach col-0 list marker"),
531 x => panic!("expected Widened, got {x:?}"),
532 }
533 }
534
535 #[test]
536 fn widen_setext_underline_includes_text_line_above() {
537 // P S P — damage at row 1 (underline) → widen to include row 0
538 // (heading text line).
539 let k = kinds_str("PSP");
540 match widen_to_safe(&k, 1..2) {
541 WidenResult::Widened(r) => {
542 assert_eq!(r.start, 0, "must include row above setext underline")
543 }
544 x => panic!("expected Widened, got {x:?}"),
545 }
546 }
547
548 #[test]
549 fn widen_html_block_includes_whole_block() {
550 // P X X X B P — damage at row 2 (middle of HTML) → widen to
551 // include all HtmlBlock rows.
552 let k = kinds_str("PXXXBP");
553 match widen_to_safe(&k, 2..3) {
554 WidenResult::Widened(r) => {
555 assert!(
556 r.start <= 1,
557 "must include first HtmlBlock row, got start {}",
558 r.start
559 );
560 assert!(
561 r.end >= 4,
562 "must include last HtmlBlock row, got end {}",
563 r.end
564 );
565 }
566 x => panic!("expected Widened, got {x:?}"),
567 }
568 }
569
570 #[test]
571 fn widen_exceeds_cap_returns_full_rebuild() {
572 // 300-line all-FenceContent buffer; the damage is one line;
573 // widening tries to reach the fence ends but the buffer is
574 // uniformly fence content, so widening goes to 0..300, which
575 // exceeds MAX_INCREMENTAL_LINES (256).
576 let k = vec![LineConstructKind::FenceContent; 300];
577 assert_eq!(widen_to_safe(&k, 150..151), WidenResult::FullRebuild);
578 }
579
580 #[test]
581 fn widen_trips_when_fractional_cap_exceeds_absolute() {
582 // Regression: cap-trip used `&&` instead of `||`, so on a buffer
583 // big enough that `cap_frac > cap_abs` (kinds.len() > 512), a
584 // widened range between the two thresholds slipped through.
585 // 600-line buffer of FenceContent → cap_abs=256, cap_frac=300.
586 // Widening covers the whole buffer (no safe boundaries), so
587 // widened_len=600 must trip the fallback.
588 let k = vec![LineConstructKind::FenceContent; 600];
589 assert_eq!(widen_to_safe(&k, 300..301), WidenResult::FullRebuild);
590 }
591
592 #[test]
593 fn widen_at_buffer_start_clamps_to_zero() {
594 let k = kinds_str("PPPPP");
595 match widen_to_safe(&k, 0..1) {
596 WidenResult::Widened(r) => assert_eq!(r.start, 0),
597 x => panic!("expected Widened, got {x:?}"),
598 }
599 }
600
601 #[test]
602 fn widen_at_buffer_end_clamps_to_len() {
603 let k = kinds_str("PPPPP");
604 match widen_to_safe(&k, 4..5) {
605 WidenResult::Widened(r) => assert_eq!(r.end, 5),
606 x => panic!("expected Widened, got {x:?}"),
607 }
608 }
609
610 #[test]
611 fn parse_records_boundaries_for_blank_separated_paragraphs() {
612 // Realistic markdown layout: each paragraph followed by a
613 // blank line. Pulldown ends each Paragraph; depth drops to
614 // 0 at the following blank row. The boundary set should
615 // contain every blank row.
616 use super::super::markdown::ParsedBuffer;
617 let mut lines: Vec<String> = Vec::with_capacity(8);
618 for i in 0..4 {
619 lines.push(format!("paragraph {i}"));
620 lines.push(String::new());
621 }
622 let pb = ParsedBuffer::parse(&lines);
623 // Expected: 0, then every Blank row (1, 3, 5, 7), then lines.len() (8).
624 // The blank at row 7 == lines.len()-1 may or may not be
625 // present depending on whether depth==0 was reached at that
626 // row; check the interior at least.
627 assert!(pb.reset_boundaries.contains(&0), "sentinel 0 missing");
628 assert!(
629 pb.reset_boundaries.contains(&lines.len()),
630 "sentinel lines.len() missing"
631 );
632 assert!(
633 pb.reset_boundaries.contains(&1),
634 "blank after paragraph 0 should be a boundary, got {:?}",
635 pb.reset_boundaries
636 );
637 assert!(
638 pb.reset_boundaries.contains(&3),
639 "blank after paragraph 1 should be a boundary, got {:?}",
640 pb.reset_boundaries
641 );
642 }
643
644 #[test]
645 fn expand_to_reset_uses_nearest_sentinels() {
646 // Only sentinels [0, 5] in the boundary set — every edit
647 // expands to the full buffer.
648 let boundaries = vec![0, 5];
649 match expand_to_reset_boundary(&boundaries, 5, 2..3) {
650 WidenResult::Widened(r) => assert_eq!(r, 0..5),
651 x => panic!("expected Widened, got {x:?}"),
652 }
653 }
654
655 #[test]
656 fn expand_to_reset_snaps_to_interior_boundaries() {
657 // Boundaries at rows 0, 3, 6, 10 (e.g. blank-separated
658 // blocks). Damage at row 4 expands to 3..6.
659 let boundaries = vec![0, 3, 6, 10];
660 match expand_to_reset_boundary(&boundaries, 10, 4..5) {
661 WidenResult::Widened(r) => assert_eq!(r, 3..6),
662 x => panic!("expected Widened, got {x:?}"),
663 }
664 }
665
666 #[test]
667 fn expand_to_reset_damage_at_exact_boundary_is_zero_span() {
668 // Damage range coincides with a boundary point. The function
669 // returns the smallest enclosing boundary pair.
670 let boundaries = vec![0, 3, 6, 10];
671 // damaged.start == damaged.end == 6. Expands to 6..6 (empty).
672 match expand_to_reset_boundary(&boundaries, 10, 6..6) {
673 WidenResult::Widened(r) => assert_eq!(r, 6..6),
674 x => panic!("expected Widened, got {x:?}"),
675 }
676 }
677
678 #[test]
679 fn expand_to_reset_empty_buffer_falls_back() {
680 let boundaries = vec![0];
681 assert_eq!(
682 expand_to_reset_boundary(&boundaries, 0, 0..0),
683 WidenResult::FullRebuild
684 );
685 }
686
687 #[test]
688 fn expand_to_reset_caps_trip_fallback() {
689 // 600-row buffer, no interior boundaries. Damage at 300
690 // expands to 0..600 which exceeds cap_abs (256) and cap_frac
691 // (300, floored at cap_abs).
692 let boundaries = vec![0, 600];
693 assert_eq!(
694 expand_to_reset_boundary(&boundaries, 600, 300..301),
695 WidenResult::FullRebuild
696 );
697 }
698
699 #[test]
700 fn widen_blockquote_includes_whole_block() {
701 // P Q Q Q B P — damage in the middle of a blockquote → widen
702 // to include the whole blockquote.
703 let k = kinds_str("PQQQBP");
704 match widen_to_safe(&k, 2..3) {
705 WidenResult::Widened(r) => {
706 assert!(
707 r.start <= 1,
708 "must include first Blockquote row, got start {}",
709 r.start
710 );
711 assert!(
712 r.end >= 4,
713 "must include last Blockquote row, got end {}",
714 r.end
715 );
716 }
717 x => panic!("expected Widened, got {x:?}"),
718 }
719 }
720
721 #[test]
722 fn widen_multi_list_does_not_over_pull_across_blank() {
723 // Two independent lists separated by a blank line. Damage in
724 // the second list must not pull the first list into the slice.
725 let k = kinds_str("LlBLll");
726 match widen_to_safe(&k, 4..5) {
727 WidenResult::Widened(r) => {
728 // The blank at row 2 is the separator. Widening must
729 // stop there (or at the row above, after D5 +1).
730 assert!(
731 r.start >= 1,
732 "widen.start must be >= 1 (D5 may pull past Blank by one row), got {}",
733 r.start
734 );
735 assert!(
736 r.start <= 2,
737 "widen.start must not pull in list A, got {}",
738 r.start
739 );
740 }
741 x => panic!("expected Widened, got {x:?}"),
742 }
743 }
744
745 #[test]
746 fn fence_ranges_single_fence() {
747 // P F C C F P — fence covers rows 1..5 (half-open: both markers + content).
748 let k = kinds_str("PFCCFP");
749 let r = fence_ranges_from_kinds(&k);
750 assert_eq!(r, vec![1..5]);
751 }
752
753 #[test]
754 fn fence_ranges_two_fences() {
755 // F C F P F C F — two fences at 0..3 and 4..7.
756 let k = kinds_str("FCFPFCF");
757 let r = fence_ranges_from_kinds(&k);
758 assert_eq!(r, vec![0..3, 4..7]);
759 }
760
761 #[test]
762 fn fence_ranges_unclosed_extends_to_end() {
763 // P F C C C — unclosed fence runs to end of buffer.
764 let k = kinds_str("PFCCC");
765 let r = fence_ranges_from_kinds(&k);
766 assert_eq!(r, vec![1..5]);
767 }
768
769 #[test]
770 fn fence_ranges_empty() {
771 assert!(fence_ranges_from_kinds(&[]).is_empty());
772 }
773
774 #[test]
775 fn investigate_list_fence_indented_code_interaction() {
776 // Initial: row 7 " a" is after "- a" (row 1) with 5 blank lines in between.
777 // After editing row 9 (blank → space inside fence), fresh parse changes row 7.
778 let initial: Vec<String> = vec![
779 "".to_string(), // 0: Blank
780 "- a".to_string(), // 1: ListMarker
781 "".to_string(), // 2: Blank
782 "".to_string(), // 3: Blank
783 "".to_string(), // 4: Blank
784 "".to_string(), // 5: Blank
785 "".to_string(), // 6: Blank
786 " a".to_string(), // 7: ? - before fence
787 "```".to_string(), // 8: FenceMarker
788 "".to_string(), // 9: FenceContent -> edit to " "
789 "".to_string(), // 10: FenceContent
790 "".to_string(), // 11: FenceContent
791 "".to_string(), // 12: FenceContent
792 "".to_string(), // 13: FenceContent
793 "".to_string(), // 14: FenceContent
794 "".to_string(), // 15: FenceContent
795 "".to_string(), // 16: FenceContent
796 "> a".to_string(), // 17: FenceContent
797 "".to_string(), // 18: FenceContent
798 "> ".to_string(), // 19: FenceContent
799 "".to_string(), // 20: FenceContent
800 "".to_string(), // 21: FenceContent
801 "".to_string(), // 22: FenceContent (last row → FenceMarker?)
802 ];
803 let initial_pb = ParsedBuffer::parse(&initial);
804 eprintln!("initial kinds: {:?}", &initial_pb.kinds);
805
806 let mut edited = initial.clone();
807 edited[9].push(' ');
808 let edited_pb = ParsedBuffer::parse(&edited);
809 eprintln!("edited kinds: {:?}", &edited_pb.kinds);
810
811 // Compare just the first 10 rows to see where divergence starts
812 for i in 0..23 {
813 if initial_pb.kinds[i] != edited_pb.kinds[i] {
814 eprintln!(
815 "Row {} differs: initial={:?}, edited={:?}",
816 i, initial_pb.kinds[i], edited_pb.kinds[i]
817 );
818 }
819 }
820 }
821}