normalize-refactor 0.3.2

Composable refactoring engine for normalize
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
//! Semantic actions: query and mutation primitives for refactoring recipes.
//!
//! **Query actions** return data without side effects.
//! **Mutation actions** produce `PlannedEdit`s without touching the filesystem.

use std::collections::HashSet;
use std::path::Path;

use normalize_edit::SymbolLocation;
use normalize_languages::parsers::{grammar_loader, parse_with_grammar};
use normalize_languages::satisfies_predicates;
use normalize_languages::support_for_path;
use tree_sitter::StreamingIterator as _;

use crate::{CallerRef, ImportRef, PlannedEdit, RefactoringContext, References};

// ── Query actions ────────────────────────────────────────────────────

/// Find a symbol's location in a file.
pub fn locate_symbol(
    ctx: &RefactoringContext,
    file: &Path,
    content: &str,
    name: &str,
) -> Option<SymbolLocation> {
    ctx.editor.find_symbol(file, content, name, false)
}

/// Tree-sitter node `kind` values that count as a leading decoration
/// (doc comment, attribute, decorator, annotation, pragma) for any language.
///
/// Comment kinds are matched separately by substring: any kind containing
/// `"comment"` is treated as a decoration. Listed here are the non-comment
/// kinds across the grammars normalize supports.
const DECORATION_KINDS: &[&str] = &[
    "attribute_item",       // Rust outer attribute `#[...]`
    "inner_attribute_item", // Rust inner attribute `#![...]`
    "meta_item",            // Rust attribute body
    "attribute",            // C#, generic
    "attribute_list",       // C#
    "decorator",            // Python, JS/TS (TypeScript decorators)
    "decorator_list",       // grouped decorators
    "annotation",           // Java, Kotlin
    "marker_annotation",    // Java
    "modifiers",            // Java/Kotlin annotations live under `modifiers`
    "pragma",               // C/C++
    "preproc_call",         // C/C++ preprocessor lines like `#pragma`
];

fn is_decoration_kind(kind: &str) -> bool {
    kind.contains("comment") || DECORATION_KINDS.contains(&kind)
}

/// Tree-sitter node kinds that wrap a definition together with its decorations
/// (decorators, attributes, export modifier, etc.) under a single parent. When
/// the captured symbol node's parent is one of these, leading comments live as
/// siblings of the *wrapper*, not of the symbol — so the walk must climb to
/// the wrapper before scanning previous siblings.
const DECORATION_WRAPPER_KINDS: &[&str] = &[
    "decorated_definition", // Python `@decorator\ndef foo()` / `@decorator\nclass Foo`
    "export_statement",     // TypeScript/JavaScript `export function foo()` / `export class Foo`
    "export_default_declaration", // TypeScript/JavaScript `export default class Foo`
    "ambient_declaration",  // TypeScript `declare ...`
];

fn is_decoration_wrapper_kind(kind: &str) -> bool {
    DECORATION_WRAPPER_KINDS.contains(&kind)
}

/// Walk backward from the symbol's node through preceding named siblings,
/// collecting decoration nodes (doc comments, attributes, decorators, etc.).
/// Returns `(byte_offset, warning)` where:
/// - `byte_offset` is the line-start of the earliest decoration found, or
///   `loc.start_byte` if there are no decorations or no grammar is available.
/// - `warning` is `Some(msg)` when the function fell back because the grammar
///   was unavailable; `None` when the grammar was used (even if no decorations
///   were found).
///
/// Classification is by `node.kind()` from the grammar — never by source text.
pub fn decoration_extended_start(
    file: &Path,
    content: &str,
    loc: &SymbolLocation,
    // normalize-syntax-allow: rust/tuple-return
) -> (usize, Option<String>) {
    let fallback = loc.start_byte;
    let Some(support) = support_for_path(file) else {
        let ext = file
            .extension()
            .and_then(|e| e.to_str())
            .unwrap_or("<unknown>");
        return (
            fallback,
            Some(format!(
                "No language support for {ext}: doc comments and attributes will not be included with the moved symbol"
            )),
        );
    };
    let grammar = support.grammar_name();
    let Some(tree) = parse_with_grammar(grammar, content) else {
        return (
            fallback,
            Some(format!(
                "Grammar for {grammar} not loaded: doc comments and attributes will not be included. Install grammars with `normalize grammars install`."
            )),
        );
    };

    let root = tree.root_node();
    // The symbol's def node — descendant_for_byte_range returns the smallest
    // node containing the range. Using the full [start, end) of the symbol can
    // overshoot the def node when end_byte is set to the start of the line
    // after the symbol (a common convention) — that byte may not lie within
    // the def node, forcing us up to `module`. Use a point query at the start
    // byte to anchor on the def itself; we then walk up to find the outermost
    // ancestor that begins at the same byte.
    let sym_start = loc.start_byte.min(content.len());
    let Some(mut node) = root.descendant_for_byte_range(sym_start, sym_start) else {
        return (fallback, None);
    };

    // descendant_for_byte_range may return a small inner node (e.g. an identifier)
    // when the symbol's start byte is line-aligned. Walk up to the outermost
    // ancestor whose start_byte equals the matched node's start_byte — this is
    // the def/declaration node we want preceding-sibling info for.
    while let Some(parent) = node.parent() {
        if parent.start_byte() == node.start_byte() && parent.id() != root.id() {
            node = parent;
        } else {
            break;
        }
    }

    // Build the set of decoration node IDs using the decorations query when
    // available, falling back to the hardcoded kind list otherwise.
    let loader = grammar_loader();
    let decoration_ids: Option<HashSet<usize>> = loader.get_decorations(grammar).and_then(|q| {
        let compiled = loader.get_compiled_query(grammar, "decorations", &q)?;
        let mut qcursor = tree_sitter::QueryCursor::new();
        let mut matches = qcursor.matches(&compiled, root, content.as_bytes());
        let mut ids = HashSet::new();
        let source_bytes = content.as_bytes();
        while let Some(m) = matches.next() {
            if !satisfies_predicates(&compiled, m, source_bytes) {
                continue;
            }
            for capture in m.captures {
                ids.insert(capture.node.id());
            }
        }
        Some(ids)
    });

    let is_decoration = |n: tree_sitter::Node<'_>| -> bool {
        if let Some(ref ids) = decoration_ids {
            ids.contains(&n.id())
        } else {
            is_decoration_kind(n.kind())
        }
    };

    // Walk preceding named siblings while they classify as decorations.
    //
    // Some grammars wrap a definition together with its decorators/attributes
    // under a single node (e.g. Python `decorated_definition`, TS `export_statement`).
    // When we exhaust prev siblings within that wrapper, climb to the wrapper
    // and continue scanning siblings of the wrapper itself — leading comments
    // live there, not under the wrapper.
    let initial_start = node.start_byte();
    let mut earliest_start = initial_start;
    let mut cursor = node;
    loop {
        while let Some(prev) = cursor.prev_named_sibling() {
            if !is_decoration(prev) {
                // Encountered a non-decoration sibling; stop entirely.
                return finalize(content, earliest_start, initial_start, fallback);
            }
            // Only include if the gap between `prev` and the decoration block we've
            // already accepted is whitespace-only (no intervening code/punctuation).
            let gap = &content.as_bytes()[prev.end_byte()..earliest_start];
            if !gap.iter().all(|b| b.is_ascii_whitespace()) {
                return finalize(content, earliest_start, initial_start, fallback);
            }
            earliest_start = prev.start_byte();
            cursor = prev;
        }
        // No more prev siblings inside the current scope. If the parent is a
        // known decoration-wrapper, step out to the wrapper and keep scanning.
        let Some(parent) = cursor.parent() else { break };
        if parent.id() == root.id() || !is_decoration_wrapper_kind(parent.kind()) {
            break;
        }
        // The wrapper's leading content (everything up to its first child) is
        // part of the symbol's surface — walk wrapper's prev siblings next.
        cursor = parent;
    }
    finalize(content, earliest_start, initial_start, fallback)
}

fn finalize(
    content: &str,
    earliest_start: usize,
    initial_start: usize,
    fallback: usize,
    // normalize-syntax-allow: rust/tuple-return
) -> (usize, Option<String>) {
    if earliest_start == initial_start {
        return (fallback, None);
    }
    // Snap to the start of the line containing earliest_start so we capture
    // any indentation on that line (consistent with `delete_symbol`'s line
    // semantics).
    let snapped = content[..earliest_start]
        .rfind('\n')
        .map(|i| i + 1)
        .unwrap_or(0);
    (snapped, None)
}

/// Find all cross-file references to a symbol (callers + importers).
///
/// Returns empty references if no index is available.
///
/// Each reference is tagged with a `confidence` field:
/// - `"resolved"` — backed by `ModuleResolver` import resolution (accurate)
/// - `"heuristic"` — found via import-name matching without full resolution (may have false positives)
pub async fn find_references(
    ctx: &RefactoringContext,
    symbol_name: &str,
    def_file: &str,
) -> References {
    let Some(ref idx) = ctx.index else {
        return References {
            callers: vec![],
            importers: vec![],
        };
    };

    // Determine confidence level based on whether the def_file's language has a resolver.
    // If it does, imports for that language were resolved via ModuleResolver; results are
    // accurate. If not, results are heuristic (import-name matching only).
    let confidence: &'static str = {
        let def_path = ctx.root.join(def_file);
        if support_for_path(&def_path)
            .and_then(|lang| lang.module_resolver())
            .is_some()
        {
            "resolved"
        } else {
            "heuristic"
        }
    };

    let callers = idx
        .find_callers(symbol_name, def_file)
        .await
        .unwrap_or_default()
        .into_iter()
        .map(|(file, caller, line, access)| CallerRef {
            file,
            caller,
            line,
            access,
            confidence,
        })
        .collect();

    let importers = idx
        .find_symbol_importers(symbol_name)
        .await
        .unwrap_or_default()
        .into_iter()
        .map(|(file, name, alias, line)| ImportRef {
            file,
            name,
            alias,
            line,
            confidence,
        })
        .collect();

    References { callers, importers }
}

/// Check for naming conflicts that a rename would introduce.
///
/// Returns a list of conflict descriptions (empty = no conflicts).
pub async fn check_conflicts(
    ctx: &RefactoringContext,
    def_file: &Path,
    def_content: &str,
    new_name: &str,
    importers: &[ImportRef],
) -> Vec<String> {
    let mut conflicts = vec![];

    // 1. Does new_name already exist as a symbol in the definition file?
    if ctx
        .editor
        .find_symbol(def_file, def_content, new_name, false)
        .is_some()
    {
        let rel = def_file
            .strip_prefix(&ctx.root)
            .unwrap_or(def_file)
            .to_string_lossy();
        conflicts.push(format!("{}: symbol '{}' already exists", rel, new_name));
    }

    // 2. Does any importer file already import something named new_name?
    if !importers.is_empty()
        && let Some(ref idx) = ctx.index
    {
        for imp in importers {
            if idx
                .has_import_named(&imp.file, new_name)
                .await
                .unwrap_or(false)
            {
                conflicts.push(format!("{}: already imports '{}'", imp.file, new_name));
            }
        }
    }

    conflicts
}

// ── Mutation actions ─────────────────────────────────────────────────

/// Plan renames of an identifier across specific lines in a file.
///
/// Groups all line-level renames into a single `PlannedEdit` for the file.
/// Returns `None` if no lines actually matched (e.g. stale index data).
pub fn plan_rename_in_file(
    ctx: &RefactoringContext,
    file: &Path,
    content: &str,
    lines: &[usize],
    old_name: &str,
    new_name: &str,
) -> Option<PlannedEdit> {
    let mut current = content.to_string();
    let mut changed = false;

    for &line_no in lines {
        if let Some(new_content) = ctx
            .editor
            .rename_identifier_in_line(&current, line_no, old_name, new_name)
        {
            current = new_content;
            changed = true;
        }
    }

    if changed {
        Some(PlannedEdit {
            file: file.to_path_buf(),
            original: content.to_string(),
            new_content: current,
            description: format!("{} -> {}", old_name, new_name),
        })
    } else {
        None
    }
}

/// Plan deletion of a symbol from a file.
pub fn plan_delete_symbol(
    ctx: &RefactoringContext,
    file: &Path,
    content: &str,
    loc: &SymbolLocation,
) -> PlannedEdit {
    let new_content = ctx.editor.delete_symbol(content, loc);
    PlannedEdit {
        file: file.to_path_buf(),
        original: content.to_string(),
        new_content,
        description: format!("delete {}", loc.name),
    }
}

/// Plan insertion of code relative to a symbol.
pub fn plan_insert(
    ctx: &RefactoringContext,
    file: &Path,
    content: &str,
    loc: &SymbolLocation,
    position: InsertPosition,
    code: &str,
) -> PlannedEdit {
    let new_content = match position {
        InsertPosition::Before => ctx.editor.insert_before(content, loc, code),
        InsertPosition::After => ctx.editor.insert_after(content, loc, code),
    };
    let pos_str = match position {
        InsertPosition::Before => "before",
        InsertPosition::After => "after",
    };
    PlannedEdit {
        file: file.to_path_buf(),
        original: content.to_string(),
        new_content,
        description: format!("insert {} {}", pos_str, loc.name),
    }
}

/// Plan replacement of a symbol's content.
pub fn plan_replace_symbol(
    ctx: &RefactoringContext,
    file: &Path,
    content: &str,
    loc: &SymbolLocation,
    new_code: &str,
) -> PlannedEdit {
    let new_content = ctx.editor.replace_symbol(content, loc, new_code);
    PlannedEdit {
        file: file.to_path_buf(),
        original: content.to_string(),
        new_content,
        description: format!("replace {}", loc.name),
    }
}

/// Where to insert code relative to a symbol.
pub enum InsertPosition {
    Before,
    After,
}

#[cfg(test)]
mod tests {
    use super::*;
    use normalize_edit::Editor;

    fn make_ctx(root: &Path) -> RefactoringContext {
        RefactoringContext {
            root: root.to_path_buf(),
            editor: Editor::new(),
            index: None,
            loader: normalize_languages::GrammarLoader::new(),
        }
    }

    #[test]
    fn plan_rename_single_line() {
        let dir = tempfile::tempdir().unwrap();
        let ctx = make_ctx(dir.path());
        let file = dir.path().join("test.rs");
        let content = "fn old_func() {}\nfn other() { old_func(); }\n";

        let edit = plan_rename_in_file(&ctx, &file, content, &[1], "old_func", "new_func");
        assert!(edit.is_some());
        let edit = edit.unwrap();
        assert!(edit.new_content.contains("new_func"));
        assert!(edit.new_content.contains("old_func")); // line 2 not renamed
    }

    #[test]
    fn plan_rename_multiple_lines() {
        let dir = tempfile::tempdir().unwrap();
        let ctx = make_ctx(dir.path());
        let file = dir.path().join("test.rs");
        let content = "fn old_func() {}\nfn other() { old_func(); }\n";

        let edit = plan_rename_in_file(&ctx, &file, content, &[1, 2], "old_func", "new_func");
        assert!(edit.is_some());
        let edit = edit.unwrap();
        assert!(!edit.new_content.contains("old_func"));
    }

    #[test]
    fn plan_rename_no_match_returns_none() {
        let dir = tempfile::tempdir().unwrap();
        let ctx = make_ctx(dir.path());
        let file = dir.path().join("test.rs");
        let content = "fn something() {}\n";

        let edit = plan_rename_in_file(&ctx, &file, content, &[1], "nonexistent", "new_name");
        assert!(edit.is_none());
    }

    #[test]
    fn locate_symbol_found() {
        let dir = tempfile::tempdir().unwrap();
        let ctx = make_ctx(dir.path());
        let file = dir.path().join("test.rs");
        std::fs::write(&file, "fn my_func() {}\n").unwrap();

        let loc = locate_symbol(&ctx, &file, "fn my_func() {}\n", "my_func");
        assert!(loc.is_some());
        assert_eq!(loc.unwrap().name, "my_func");
    }

    /// Returns true if the named external grammar can be loaded; tests that need
    /// a grammar should `return` early when this is false to avoid spurious failures
    /// in environments without `NORMALIZE_GRAMMAR_PATH` configured.
    fn grammar_available(name: &str) -> bool {
        normalize_languages::parsers::parser_for(name).is_some()
    }

    #[test]
    fn decoration_python_decorator_and_comment() {
        if !grammar_available("python") {
            eprintln!("skipping: python grammar not available");
            return;
        }
        let content = "\
import x

# Leading comment line 1.
# Leading comment line 2.
@decorator
@other_decorator
def my_func():
    pass
";
        let dir = tempfile::tempdir().unwrap();
        let file = dir.path().join("test.py");
        let editor = normalize_edit::Editor::new();
        std::fs::write(&file, content).unwrap();
        let loc = editor
            .find_symbol(&file, content, "my_func", false)
            .expect("locate");
        let (start, warning) = decoration_extended_start(&file, content, &loc);
        assert!(warning.is_none(), "unexpected warning: {:?}", warning);
        let slice = &content[start..];
        assert!(
            slice.starts_with("# Leading comment line 1.\n"),
            "expected leading comments + decorators included; got: {:?}",
            slice
        );
        assert!(slice.contains("@decorator\n"));
        assert!(slice.contains("@other_decorator\n"));
    }

    #[test]
    fn decoration_python_no_decoration_returns_original() {
        if !grammar_available("python") {
            eprintln!("skipping: python grammar not available");
            return;
        }
        let content = "def alone():\n    pass\n";
        let dir = tempfile::tempdir().unwrap();
        let file = dir.path().join("test.py");
        std::fs::write(&file, content).unwrap();
        let editor = normalize_edit::Editor::new();
        let loc = editor
            .find_symbol(&file, content, "alone", false)
            .expect("locate");
        let (start, warning) = decoration_extended_start(&file, content, &loc);
        assert!(warning.is_none(), "unexpected warning: {:?}", warning);
        assert_eq!(start, loc.start_byte);
    }

    #[test]
    fn decoration_javascript_decorator() {
        if !grammar_available("javascript") {
            eprintln!("skipping: javascript grammar not available");
            return;
        }
        let content = "\
// Leading comment.
class Wrapper {
  @log
  myMethod() {}
}
";
        let dir = tempfile::tempdir().unwrap();
        let file = dir.path().join("test.js");
        std::fs::write(&file, content).unwrap();
        let editor = normalize_edit::Editor::new();
        let loc = editor
            .find_symbol(&file, content, "myMethod", false)
            .expect("locate");
        let (start, warning) = decoration_extended_start(&file, content, &loc);
        assert!(warning.is_none(), "unexpected warning: {:?}", warning);
        // The decorator and the line above (whitespace-only indent) must be included.
        let slice = &content[start..];
        assert!(
            slice.trim_start().starts_with("@log"),
            "expected @log decorator included; got: {:?}",
            slice
        );
    }

    #[test]
    fn decoration_unsupported_language_falls_back() {
        // Path with no registered grammar — should return loc.start_byte unchanged.
        let content = "anything here";
        let file = std::path::PathBuf::from("test.unknown_ext_xyz");
        let loc = SymbolLocation {
            name: "x".to_string(),
            kind: "function".to_string(),
            start_byte: 5,
            end_byte: 10,
            start_line: 1,
            end_line: 1,
            indent: String::new(),
        };
        let (start, warning) = decoration_extended_start(&file, content, &loc);
        assert_eq!(start, 5);
        assert!(
            warning.is_some(),
            "expected a warning for unsupported language"
        );
        assert!(
            warning.unwrap().contains("unknown_ext_xyz"),
            "warning should mention the extension"
        );
    }

    #[test]
    fn locate_symbol_not_found() {
        let dir = tempfile::tempdir().unwrap();
        let ctx = make_ctx(dir.path());
        let file = dir.path().join("test.rs");
        std::fs::write(&file, "fn my_func() {}\n").unwrap();

        let loc = locate_symbol(&ctx, &file, "fn my_func() {}\n", "nonexistent");
        assert!(loc.is_none());
    }
}