arity 0.2.0

An LSP, formatter, and linter for R
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
//! Incremental reparse: turn a single text edit plus the previous parse tree
//! into a new tree without re-lexing and re-parsing the whole file.
//!
//! Modelled on rust-analyzer's `reparsing.rs`: try the cheapest strategy first
//! and fall back to progressively more work.
//!
//! 1. [`reparse_token`] — the edit lands inside one leaf token (identifier,
//!    string, comment, whitespace) and re-lexing that token's text yields the
//!    same single token kind. Splice a fresh leaf in place.
//! 2. [`reparse_block`] — the edit lands strictly inside a `{ … }` block. Re-lex
//!    and re-parse just that block (it is self-contained — braces delimit it, so
//!    its subtree never depends on the surrounding context) and splice the new
//!    block subtree in place.
//! 3. Otherwise return `None`; the caller does a full [`crate::parser::parse`].
//!
//! **Correctness invariant (Tenet 4):** a successful reparse must yield a green
//! tree *and* diagnostics byte-identical to a full parse of the edited text.
//! Splicing relies on rowan green nodes being position-independent. The invariant
//! is enforced by `tests/incremental_reparse.rs` (an oracle property test over the
//! corpus) and a `debug_assert!` here.

use std::ops::Range;

use rowan::{GreenNode, GreenToken, TextRange, TextSize};

use crate::parser::bracket_balancer::rebalance_brackets;
use crate::parser::diagnostics::ParseDiagnostic;
use crate::parser::expr::parse_expr;
use crate::parser::lexer::lex;
use crate::parser::tree_builder::{build_tree, syntax_kind_for};
use crate::syntax::{SyntaxKind, SyntaxNode};

/// A single contiguous text edit: replace `range` (a byte range in the *old*
/// text) with `insert`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Edit {
    pub range: Range<usize>,
    pub insert: String,
}

impl Edit {
    /// The signed length change this edit applies to text after `range`.
    fn delta(&self) -> isize {
        self.insert.len() as isize - (self.range.end - self.range.start) as isize
    }

    /// Apply the edit to `old`, producing the new text.
    pub fn apply(&self, old: &str) -> String {
        let mut out =
            String::with_capacity(old.len().saturating_sub(self.range.len()) + self.insert.len());
        out.push_str(&old[..self.range.start]);
        out.push_str(&self.insert);
        out.push_str(&old[self.range.end..]);
        out
    }
}

/// Map a `TextRange` taken against the text *before* `edit` to its position in
/// the text *after* `edit`.
///
/// Returns `None` when the edit's replaced range overlaps the node's interior:
/// a `(kind, range)` handle cannot promise the node survived an edit inside it
/// (its text, and possibly its kind, may have changed). Comparisons are
/// half-open, so the boundary cases are deterministic: an edit ending exactly at
/// the node's start shifts the node (it survives), an edit starting exactly at
/// the node's end leaves it unchanged (it survives), and an insertion strictly
/// inside the node returns `None`. A zero-length node sitting at an insertion
/// point is treated as preceding the insertion (unchanged).
///
/// Note: [`parsed_document`](crate::incremental::parsed_document) recovers a
/// single spanning [`diff_edit`], so several disjoint keystrokes coalesce into
/// one wide edit. That only widens the overlap and yields `None` more often
/// (more position-based re-resolution by the caller) — it never produces a wrong
/// mapping. Feeding precise per-change LSP edits would shrink the edit and
/// reduce those fallbacks.
pub fn map_range_through_edit(range: TextRange, edit: &Edit) -> Option<TextRange> {
    let node_start = usize::from(range.start());
    let node_end = usize::from(range.end());

    if node_end <= edit.range.start {
        // Node entirely before the edit: untouched.
        Some(range)
    } else if node_start >= edit.range.end {
        // Node entirely after the edit: every offset shifts by the length delta.
        let start = (node_start as isize + edit.delta()) as usize;
        let end = (node_end as isize + edit.delta()) as usize;
        Some(text_range(start, end))
    } else {
        // The edit's replaced range intersects the node's interior: invalidated.
        None
    }
}

/// Map a range through a sequence of edits applied left-to-right (the order
/// [`Edit::apply`] would run them — each edit is expressed against the text
/// produced by its predecessors). Any single invalidating edit collapses the
/// whole result to `None`.
pub fn map_range_through_edits(range: TextRange, edits: &[Edit]) -> Option<TextRange> {
    edits.iter().try_fold(range, map_range_through_edit)
}

/// Which strategy produced a [`Reparsed`]. Surfaced for tests and benchmarks.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReparseKind {
    Token,
    Block,
}

/// The result of a successful incremental reparse: the new whole-file green tree
/// and its parse diagnostics (absolute offsets in the new text).
#[derive(Debug, Clone)]
pub struct Reparsed {
    pub green: GreenNode,
    pub diagnostics: Vec<ParseDiagnostic>,
    pub kind: ReparseKind,
}

/// Recover a single contiguous [`Edit`] (deleted range in `old` + inserted text)
/// from a pair of full texts by stripping the common prefix and suffix. Multiple
/// disjoint edits collapse into one spanning edit — still a correct transform,
/// just coarser. Boundaries are clamped to char boundaries.
pub fn diff_edit(old: &str, new: &str) -> Edit {
    let ob = old.as_bytes();
    let nb = new.as_bytes();

    let mut prefix = 0;
    let max_prefix = ob.len().min(nb.len());
    while prefix < max_prefix && ob[prefix] == nb[prefix] {
        prefix += 1;
    }
    while prefix > 0 && !old.is_char_boundary(prefix) {
        prefix -= 1;
    }

    let mut suffix = 0;
    let max_suffix = (ob.len() - prefix).min(nb.len() - prefix);
    while suffix < max_suffix && ob[ob.len() - 1 - suffix] == nb[nb.len() - 1 - suffix] {
        suffix += 1;
    }
    while suffix > 0
        && (!old.is_char_boundary(old.len() - suffix) || !new.is_char_boundary(new.len() - suffix))
    {
        suffix -= 1;
    }

    Edit {
        range: prefix..(old.len() - suffix),
        insert: new[prefix..(new.len() - suffix)].to_string(),
    }
}

/// Attempt an incremental reparse of `old_root` (parsed from `old_text`, with
/// `old_diags`) under `edit`. Returns `None` when no incremental strategy applies
/// — the caller must then do a full parse.
pub fn reparse(
    old_root: &SyntaxNode,
    old_text: &str,
    old_diags: &[ParseDiagnostic],
    edit: &Edit,
) -> Option<Reparsed> {
    let result = reparse_token(old_root, old_text, old_diags, edit)
        .or_else(|| reparse_block(old_root, old_text, old_diags, edit));

    if let Some(reparsed) = &result {
        debug_assert_eq!(
            SyntaxNode::new_root(reparsed.green.clone())
                .text()
                .to_string(),
            edit.apply(old_text),
            "incremental reparse ({:?}) is not lossless",
            reparsed.kind,
        );
    }
    result
}

const TOKEN_REPARSE_KINDS: &[SyntaxKind] = &[
    SyntaxKind::IDENT,
    SyntaxKind::STRING,
    SyntaxKind::COMMENT,
    SyntaxKind::WHITESPACE,
];

fn reparse_token(
    old_root: &SyntaxNode,
    old_text: &str,
    old_diags: &[ParseDiagnostic],
    edit: &Edit,
) -> Option<Reparsed> {
    let (s, e) = (edit.range.start, edit.range.end);
    let elem = old_root.covering_element(text_range(s, e));
    let token = elem.into_token()?;
    if !TOKEN_REPARSE_KINDS.contains(&token.kind()) {
        return None;
    }

    let tr = token.text_range();
    let (t0, t1) = (usize::from(tr.start()), usize::from(tr.end()));
    // A pure insertion must land strictly inside the token, so it is
    // unambiguous which token it extends. Edits with a non-empty range that
    // span a token boundary never reach here — `covering_element` returns the
    // parent node, not a token.
    if s == e && !(t0 < s && s < t1) {
        return None;
    }

    // The token's new text, with the edit applied at its relative offset.
    let mut new_text = token.text().to_string();
    new_text.replace_range((s - t0)..(e - t0), &edit.insert);

    // Re-lex in isolation: it must still be exactly one token of the same kind.
    let relexed = lex(&new_text);
    let [only] = relexed.as_slice() else {
        return None;
    };
    if only.start != 0 || only.end != new_text.len() || syntax_kind_for(&only.kind) != token.kind()
    {
        return None;
    }

    // Merge guard: appending the following source character must not extend the
    // token (which would mean the edit merged it with its neighbour, e.g.
    // `for`→`fore`, or an unterminated string swallowing the next line).
    if let Some(next_char) = old_text[t1..].chars().next() {
        let mut probe = new_text.clone();
        probe.push(next_char);
        let probe_toks = lex(&probe);
        let first = probe_toks.first()?;
        if first.end != new_text.len() || syntax_kind_for(&first.kind) != token.kind() {
            return None;
        }
    }

    // Keep diagnostic remapping trivial: only proceed when no diagnostic touches
    // the edited token. A clean single-token relex introduces none of its own.
    if old_diags.iter().any(|d| d.start < t1 && d.end > t0) {
        return None;
    }
    let delta = edit.delta();
    let diagnostics = old_diags
        .iter()
        .map(|d| {
            if d.start >= t1 {
                shift(d, delta)
            } else {
                d.clone()
            }
        })
        .collect();

    let green_token = GreenToken::new(rowan::SyntaxKind::from(token.kind()), &new_text);
    let green = token.replace_with(green_token);
    Some(Reparsed {
        green,
        diagnostics,
        kind: ReparseKind::Token,
    })
}

fn reparse_block(
    old_root: &SyntaxNode,
    old_text: &str,
    old_diags: &[ParseDiagnostic],
    edit: &Edit,
) -> Option<Reparsed> {
    let (s, e) = (edit.range.start, edit.range.end);
    let elem = old_root.covering_element(text_range(s, e));
    let start_node = match elem {
        rowan::NodeOrToken::Node(n) => n,
        rowan::NodeOrToken::Token(t) => t.parent()?,
    };

    // Smallest enclosing `{ … }` whose *interior* (between the braces) fully
    // contains the edit, so the brace delimiters themselves are untouched.
    let block = start_node.ancestors().find(|node| {
        if node.kind() != SyntaxKind::BLOCK_EXPR {
            return false;
        }
        let r = node.text_range();
        let (bstart, bend) = (usize::from(r.start()), usize::from(r.end()));
        // First/last tokens are `{`/`}` (one byte each); require interior containment.
        bend.saturating_sub(bstart) >= 2 && s > bstart && e < bend
    })?;

    let r = block.text_range();
    let (bstart, bend) = (usize::from(r.start()), usize::from(r.end()));

    // New block text: the old block slice with the edit applied at its relative
    // offset. The edit is interior, so the slice still starts with `{`/ends `}`.
    let mut block_text = old_text[bstart..bend].to_string();
    block_text.replace_range((s - bstart)..(e - bstart), &edit.insert);

    let (block_green, block_diags) = parse_block_in_isolation(&block_text)?;

    let delta = edit.delta();
    let mut diagnostics: Vec<ParseDiagnostic> =
        Vec::with_capacity(old_diags.len() + block_diags.len());
    for d in old_diags {
        if d.end <= bstart {
            diagnostics.push(d.clone());
        } else if d.start >= bend {
            diagnostics.push(shift(d, delta));
        }
        // diagnostics inside the old block are dropped; the reparse regenerates them
    }
    for d in &block_diags {
        diagnostics.push(ParseDiagnostic {
            message: d.message.clone(),
            start: d.start + bstart,
            end: d.end + bstart,
        });
    }
    diagnostics.sort_by_key(|d| (d.start, d.end));

    let green = block.replace_with(block_green);
    Some(Reparsed {
        green,
        diagnostics,
        kind: ReparseKind::Block,
    })
}

/// Parse `{ … }` text on its own and return the green node for the single
/// `BLOCK_EXPR` it produces, with block-relative diagnostics. Returns `None` if
/// the text does not parse to exactly one block consuming all tokens (e.g. the
/// edit transiently unbalanced the braces) — the caller falls back.
fn parse_block_in_isolation(block_text: &str) -> Option<(GreenNode, Vec<ParseDiagnostic>)> {
    let tokens = rebalance_brackets(lex(block_text));
    let mut diagnostics = Vec::new();
    let expr = parse_expr(&tokens, 0, 0, &mut diagnostics)?;
    if expr.start != 0 || expr.end != tokens.len() {
        return None;
    }

    let root = build_tree(&tokens, &expr.events);
    let mut children = root.children();
    let block = children.next()?;
    if children.next().is_some() || block.kind() != SyntaxKind::BLOCK_EXPR {
        return None;
    }

    // The block must still be properly brace-delimited by its own *direct*
    // delimiters. If the edit opened an unterminated string/comment or an
    // unclosed inner `(`/`[` that swallowed the closing `}` (re-parenting it
    // under a child, or leaking past the old block boundary in a full parse),
    // the block is no longer self-contained — fall back.
    let first = block.first_child_or_token().map(|e| e.kind());
    let last = block.last_child_or_token().map(|e| e.kind());
    if first != Some(SyntaxKind::LBRACE) || last != Some(SyntaxKind::RBRACE) {
        return None;
    }

    Some((block.green().into_owned(), diagnostics))
}

fn shift(d: &ParseDiagnostic, delta: isize) -> ParseDiagnostic {
    ParseDiagnostic {
        message: d.message.clone(),
        start: (d.start as isize + delta) as usize,
        end: (d.end as isize + delta) as usize,
    }
}

fn text_range(start: usize, end: usize) -> TextRange {
    TextRange::new(TextSize::new(start as u32), TextSize::new(end as u32))
}

#[cfg(test)]
mod map_range_tests {
    use super::*;

    fn edit(start: usize, end: usize, insert: &str) -> Edit {
        Edit {
            range: start..end,
            insert: insert.to_string(),
        }
    }

    #[test]
    fn node_before_edit_is_unchanged() {
        let node = text_range(0, 5);
        assert_eq!(
            map_range_through_edit(node, &edit(10, 12, "xxxx")),
            Some(node)
        );
    }

    #[test]
    fn node_after_insertion_shifts_by_delta() {
        let node = text_range(10, 15);
        // Insert 4 chars where 2 stood before the node: net +2.
        assert_eq!(
            map_range_through_edit(node, &edit(0, 2, "xxxx")),
            Some(text_range(12, 17))
        );
    }

    #[test]
    fn node_after_deletion_shifts_back() {
        let node = text_range(10, 15);
        assert_eq!(
            map_range_through_edit(node, &edit(0, 4, "")),
            Some(text_range(6, 11))
        );
    }

    #[test]
    fn edit_ending_at_node_start_shifts_node() {
        // ee == ns: the node survives and shifts by the delta.
        let node = text_range(10, 15);
        assert_eq!(
            map_range_through_edit(node, &edit(8, 10, "abcd")),
            Some(text_range(12, 17))
        );
    }

    #[test]
    fn insertion_at_node_start_shifts_node() {
        let node = text_range(10, 15);
        assert_eq!(
            map_range_through_edit(node, &edit(10, 10, "ab")),
            Some(text_range(12, 17))
        );
    }

    #[test]
    fn insertion_at_node_end_leaves_node_unchanged() {
        let node = text_range(10, 15);
        assert_eq!(
            map_range_through_edit(node, &edit(15, 15, "ab")),
            Some(node)
        );
    }

    #[test]
    fn insertion_strictly_inside_invalidates() {
        let node = text_range(10, 15);
        assert_eq!(map_range_through_edit(node, &edit(12, 12, "z")), None);
    }

    #[test]
    fn overlapping_deletion_invalidates() {
        let node = text_range(10, 15);
        assert_eq!(map_range_through_edit(node, &edit(12, 18, "")), None);
    }

    #[test]
    fn zero_length_node_at_insertion_point_is_unchanged() {
        // Deterministic boundary rule: a zero-length node at the insertion point
        // is treated as preceding the insertion.
        let node = text_range(10, 10);
        assert_eq!(
            map_range_through_edit(node, &edit(10, 10, "ab")),
            Some(node)
        );
    }

    #[test]
    fn append_at_eof_leaves_earlier_node_unchanged() {
        let node = text_range(0, 5);
        assert_eq!(
            map_range_through_edit(node, &edit(5, 5, "\n# trailing")),
            Some(node)
        );
    }

    #[test]
    fn map_through_edits_folds_in_order() {
        let node = text_range(10, 15);
        // Two insertions before the node, each expressed against the prior text.
        let edits = [edit(0, 0, "ab"), edit(0, 0, "cd")];
        assert_eq!(
            map_range_through_edits(node, &edits),
            Some(text_range(14, 19))
        );
    }

    #[test]
    fn map_through_edits_invalidates_on_any_overlap() {
        let node = text_range(10, 15);
        // After the first edit the node is at [12, 17); the second lands inside.
        let edits = [edit(0, 0, "ab"), edit(13, 13, "z")];
        assert_eq!(map_range_through_edits(node, &edits), None);
    }
}