Skip to main content

normalize_edit/
lib.rs

1//! Symbol-aware file editing for normalize.
2//!
3//! Provides utilities to locate symbols within source files and apply targeted
4//! text replacements — used by `normalize edit` to rewrite functions, methods,
5//! and other named constructs without touching the rest of the file.
6
7use normalize_facts::{Extractor, Symbol};
8use normalize_languages::parsers::{grammar_loader, parse_with_grammar};
9use normalize_languages::{Language, support_for_path};
10use std::path::Path;
11use streaming_iterator::StreamingIterator;
12
13pub use normalize_languages::ContainerBody;
14
15/// Result of finding a symbol in a file
16#[derive(Debug)]
17#[allow(dead_code)] // Fields used by Debug trait and for edit operations
18pub struct SymbolLocation {
19    pub name: String,
20    pub kind: String,
21    pub start_byte: usize,
22    pub end_byte: usize,
23    pub start_line: usize,
24    pub end_line: usize,
25    pub indent: String,
26}
27
28/// Convert a 1-based line number to byte offset in content.
29/// Clamps to content length for safety (last line may not have trailing newline).
30///
31/// Uses `char_indices` to locate actual newline bytes, ensuring the returned
32/// offset always lands on a valid UTF-8 character boundary regardless of
33/// multi-byte characters or CRLF line endings.
34pub fn line_to_byte(content: &str, line: usize) -> usize {
35    if line <= 1 {
36        return 0;
37    }
38    let target = line - 1; // number of newlines to skip
39    let mut newlines_seen = 0usize;
40    let mut i = 0usize;
41    while i < content.len() {
42        // SAFETY: we advance i only to char boundaries via char_indices
43        let b = content.as_bytes()[i];
44        if b == b'\n' {
45            newlines_seen += 1;
46            if newlines_seen == target {
47                return (i + 1).min(content.len());
48            }
49        }
50        // Advance by char width to stay on boundaries
51        let ch_len = content[i..]
52            .chars()
53            .next()
54            .map(|c| c.len_utf8())
55            .unwrap_or(1);
56        i += ch_len;
57    }
58    content.len()
59}
60
61/// Editor for structural code modifications
62pub struct Editor {}
63
64impl Default for Editor {
65    fn default() -> Self {
66        Self::new()
67    }
68}
69
70impl Editor {
71    pub fn new() -> Self {
72        Self {}
73    }
74
75    /// Find a symbol by name in a file (uses skeleton extractor)
76    pub fn find_symbol(
77        &self,
78        path: &Path,
79        content: &str,
80        name: &str,
81        case_insensitive: bool,
82    ) -> Option<SymbolLocation> {
83        let extractor = Extractor::new();
84        let result = extractor.extract(path, content);
85
86        fn search_symbols(
87            symbols: &[Symbol],
88            name: &str,
89            content: &str,
90            case_insensitive: bool,
91        ) -> Option<SymbolLocation> {
92            for sym in symbols {
93                let matches = if case_insensitive {
94                    sym.name.eq_ignore_ascii_case(name)
95                } else {
96                    sym.name == name
97                };
98                if matches {
99                    let start_byte = line_to_byte(content, sym.start_line);
100                    let end_byte = line_to_byte(content, sym.end_line + 1);
101
102                    return Some(SymbolLocation {
103                        name: sym.name.clone(),
104                        kind: sym.kind.as_str().to_string(),
105                        start_byte,
106                        end_byte,
107                        start_line: sym.start_line,
108                        end_line: sym.end_line,
109                        indent: String::new(),
110                    });
111                }
112                // Search children
113                if let Some(loc) = search_symbols(&sym.children, name, content, case_insensitive) {
114                    return Some(loc);
115                }
116            }
117            None
118        }
119
120        search_symbols(&result.symbols, name, content, case_insensitive)
121    }
122
123    /// Delete a symbol from the content
124    pub fn delete_symbol(&self, content: &str, loc: &SymbolLocation) -> String {
125        let mut result = String::new();
126
127        // Find the start of the line containing the symbol
128        let line_start = content[..loc.start_byte]
129            .rfind('\n')
130            .map(|i| i + 1)
131            .unwrap_or(0);
132
133        // Find the end of the line containing the symbol end (include trailing newline)
134        let mut end_byte = loc.end_byte;
135        if end_byte < content.len() && content.as_bytes()[end_byte] == b'\n' {
136            end_byte += 1;
137        }
138
139        // Smart whitespace: consume trailing blank lines to avoid double-blanks
140        // But only if there's already a blank line before the symbol
141        let has_blank_before =
142            line_start >= 2 && &content[line_start.saturating_sub(2)..line_start] == "\n\n";
143
144        if has_blank_before {
145            // Consume trailing blank lines (up to one full blank line)
146            while end_byte < content.len() && content.as_bytes()[end_byte] == b'\n' {
147                end_byte += 1;
148                // Only consume one blank line worth
149                if end_byte < content.len() && content.as_bytes()[end_byte] != b'\n' {
150                    break;
151                }
152            }
153        }
154
155        result.push_str(&content[..line_start]);
156        result.push_str(&content[end_byte..]);
157
158        result
159    }
160
161    /// Replace a symbol with new content
162    pub fn replace_symbol(&self, content: &str, loc: &SymbolLocation, new_content: &str) -> String {
163        let mut result = String::new();
164
165        // Apply indentation to new content
166        let indented = self.apply_indent(new_content, &loc.indent);
167
168        result.push_str(&content[..loc.start_byte]);
169        result.push_str(&indented);
170        result.push_str(&content[loc.end_byte..]);
171
172        result
173    }
174
175    /// Count blank lines before a position
176    fn count_blank_lines_before(&self, content: &str, pos: usize) -> usize {
177        let mut count = 0usize;
178        let mut i = pos;
179        while i > 0 {
180            i -= 1;
181            if content.as_bytes()[i] == b'\n' {
182                count += 1;
183            } else if !content.as_bytes()[i].is_ascii_whitespace() {
184                break;
185            }
186        }
187        count.saturating_sub(1) // Don't count the newline ending the previous line
188    }
189
190    /// Count blank lines after a position (after any trailing newline)
191    fn count_blank_lines_after(&self, content: &str, pos: usize) -> usize {
192        let mut count = 0;
193        let mut i = pos;
194        // Skip past the first newline (end of current symbol)
195        if i < content.len() && content.as_bytes()[i] == b'\n' {
196            i += 1;
197        }
198        while i < content.len() {
199            if content.as_bytes()[i] == b'\n' {
200                count += 1;
201                i += 1;
202            } else if content.as_bytes()[i].is_ascii_whitespace() {
203                i += 1;
204            } else {
205                break;
206            }
207        }
208        count
209    }
210
211    /// Insert content before a symbol
212    pub fn insert_before(&self, content: &str, loc: &SymbolLocation, new_content: &str) -> String {
213        let mut result = String::new();
214
215        // Find the start of the line containing the symbol
216        let line_start = content[..loc.start_byte]
217            .rfind('\n')
218            .map(|i| i + 1)
219            .unwrap_or(0);
220
221        // Detect spacing convention: how many blank lines before this symbol?
222        let blank_lines = self.count_blank_lines_before(content, line_start);
223        // +1 for the newline ending the content, +N for N blank lines
224        let spacing = "\n".repeat(blank_lines.max(1) + 1);
225
226        // Apply indentation to new content
227        let indented = self.apply_indent(new_content, &loc.indent);
228
229        result.push_str(&content[..line_start]);
230        result.push_str(&indented);
231        result.push_str(&spacing);
232        result.push_str(&content[line_start..]);
233
234        result
235    }
236
237    /// Insert content after a symbol
238    pub fn insert_after(&self, content: &str, loc: &SymbolLocation, new_content: &str) -> String {
239        let mut result = String::new();
240
241        // Apply indentation to new content
242        let indented = self.apply_indent(new_content, &loc.indent);
243
244        // Find the end of the symbol (include trailing newline)
245        let end_pos = if loc.end_byte < content.len() && content.as_bytes()[loc.end_byte] == b'\n' {
246            loc.end_byte + 1
247        } else {
248            loc.end_byte
249        };
250
251        // Detect spacing convention: how many blank lines after this symbol?
252        let blank_lines = self.count_blank_lines_after(content, loc.end_byte);
253        // end_pos already includes trailing newline, so just add N newlines for N blank lines
254        let spacing = "\n".repeat(blank_lines.max(1));
255
256        // Find where the next non-blank content starts
257        let mut next_content_pos = end_pos;
258        while next_content_pos < content.len() && content.as_bytes()[next_content_pos] == b'\n' {
259            next_content_pos += 1;
260        }
261
262        result.push_str(&content[..end_pos]);
263        result.push_str(&spacing);
264        result.push_str(&indented);
265
266        if next_content_pos < content.len() {
267            // +1 for the newline ending the inserted content
268            result.push_str(&"\n".repeat(blank_lines.max(1) + 1));
269            result.push_str(&content[next_content_pos..]);
270        } else {
271            result.push('\n');
272        }
273
274        result
275    }
276
277    /// Insert content at the beginning of a file
278    pub fn prepend_to_file(&self, content: &str, new_content: &str) -> String {
279        let mut result = String::new();
280        result.push_str(new_content);
281        if !new_content.ends_with('\n') {
282            result.push('\n');
283        }
284        result.push_str(content);
285        result
286    }
287
288    /// Insert content at the end of a file
289    pub fn append_to_file(&self, content: &str, new_content: &str) -> String {
290        let mut result = String::new();
291        result.push_str(content);
292        if !content.ends_with('\n') {
293            result.push('\n');
294        }
295        result.push_str(new_content);
296        if !new_content.ends_with('\n') {
297            result.push('\n');
298        }
299        result
300    }
301
302    /// Find the body of a container symbol (class, impl block, markdown section) for prepend/append
303    pub fn find_container_body(
304        &self,
305        path: &Path,
306        content: &str,
307        name: &str,
308    ) -> Option<ContainerBody> {
309        let support = support_for_path(path)?;
310        let grammar = support.grammar_name();
311        let tree = parse_with_grammar(grammar, content)?;
312
313        // Use the tags query to locate container nodes by name.
314        let loader = grammar_loader();
315        let tags_scm = loader.get_tags(grammar)?;
316        let ts_lang = loader.get(grammar).ok()?;
317        let tags_query = tree_sitter::Query::new(&ts_lang, &tags_scm).ok()?;
318        find_container_body_via_tags(&tree, &tags_query, content, name, support)
319    }
320
321    /// Prepend content inside a container (class/impl body)
322    pub fn prepend_to_container(
323        &self,
324        content: &str,
325        body: &ContainerBody,
326        new_content: &str,
327    ) -> String {
328        let mut result = String::new();
329
330        // Apply indentation to new content
331        let indented = self.apply_indent(new_content, &body.inner_indent);
332
333        result.push_str(&content[..body.content_start]);
334
335        // Add the new content
336        result.push_str(&indented);
337        result.push('\n');
338
339        // Add spacing if there's existing content
340        if !body.is_empty {
341            result.push('\n');
342        }
343
344        result.push_str(&content[body.content_start..]);
345
346        result
347    }
348
349    /// Append content inside a container (class/impl body)
350    pub fn append_to_container(
351        &self,
352        content: &str,
353        body: &ContainerBody,
354        new_content: &str,
355    ) -> String {
356        let mut result = String::new();
357
358        // Apply indentation to new content
359        let indented = self.apply_indent(new_content, &body.inner_indent);
360
361        // Trim trailing whitespace/newlines from existing content
362        let mut end_pos = body.content_end;
363        while end_pos > 0
364            && content
365                .as_bytes()
366                .get(end_pos - 1)
367                .map(|&b| b == b'\n' || b == b' ')
368                == Some(true)
369        {
370            end_pos -= 1;
371        }
372
373        result.push_str(&content[..end_pos]);
374
375        // Add blank line before new content (Python/Rust convention for methods)
376        if !body.is_empty {
377            result.push_str("\n\n");
378        } else {
379            result.push('\n');
380        }
381
382        // Add the new content
383        result.push_str(&indented);
384        result.push('\n');
385
386        result.push_str(&content[body.content_end..]);
387
388        result
389    }
390
391    /// Rename all word-boundary occurrences of `old_name` on a specific line (1-based).
392    ///
393    /// Replaces every whole-word occurrence of `old_name` on that line with `new_name`.
394    /// Returns `None` if the line number is out of range or if `old_name` does not
395    /// appear as a whole word anywhere on that line.
396    pub fn rename_identifier_in_line(
397        &self,
398        content: &str,
399        line_no: usize,
400        old_name: &str,
401        new_name: &str,
402    ) -> Option<String> {
403        let (line_start, line_end) = line_byte_range(content, line_no)?;
404        let line = &content[line_start..line_end];
405        let new_line = replace_all_words(line, old_name, new_name);
406        if new_line == line {
407            return None;
408        }
409        let mut result = String::with_capacity(content.len() + new_name.len() * 4);
410        result.push_str(&content[..line_start]);
411        result.push_str(&new_line);
412        result.push_str(&content[line_end..]);
413        Some(result)
414    }
415
416    /// Apply indentation to content
417    pub fn apply_indent(&self, content: &str, indent: &str) -> String {
418        content
419            .lines()
420            .enumerate()
421            .map(|(i, line)| {
422                if i == 0 {
423                    format!("{}{}", indent, line)
424                } else if line.is_empty() {
425                    line.to_string()
426                } else {
427                    format!("{}{}", indent, line)
428                }
429            })
430            .collect::<Vec<_>>()
431            .join("\n")
432    }
433}
434
435/// Returns the byte range [start, end) of the Nth (1-based) line in `content`,
436/// not including the trailing newline. Returns `None` if `line_no` is out of range.
437fn line_byte_range(content: &str, line_no: usize) -> Option<(usize, usize)> {
438    if line_no == 0 {
439        return None;
440    }
441    let mut start = 0usize;
442    let mut current_line = 1usize;
443    for (i, c) in content.char_indices() {
444        if current_line == line_no {
445            // start is set; find end
446            let end = content[i..]
447                .find('\n')
448                .map(|n| i + n)
449                .unwrap_or(content.len());
450            return Some((start, end));
451        }
452        if c == '\n' {
453            current_line += 1;
454            start = i + 1;
455        }
456    }
457    // Handle single-line file with no newline
458    if current_line == line_no {
459        Some((start, content.len()))
460    } else {
461        None
462    }
463}
464
465/// Replace all whole-word occurrences of `old` in `text` with `new_word`.
466/// Returns the original string unchanged if no occurrences are found.
467pub fn replace_all_words(text: &str, old: &str, new_word: &str) -> String {
468    if old.is_empty() {
469        return text.to_string();
470    }
471    let bytes = text.as_bytes();
472    let mut result = String::with_capacity(text.len());
473    let mut offset = 0;
474    loop {
475        match text[offset..].find(old) {
476            None => {
477                result.push_str(&text[offset..]);
478                break;
479            }
480            Some(pos) => {
481                let abs = offset + pos;
482                let before_ok = abs == 0 || {
483                    let b = bytes[abs - 1];
484                    !b.is_ascii_alphanumeric() && b != b'_'
485                };
486                let after = abs + old.len();
487                let after_ok = after >= bytes.len() || {
488                    let b = bytes[after];
489                    !b.is_ascii_alphanumeric() && b != b'_'
490                };
491                if before_ok && after_ok {
492                    result.push_str(&text[offset..abs]);
493                    result.push_str(new_word);
494                    offset = after;
495                } else {
496                    // Not a word boundary — copy one char and keep searching
497                    let next = text[abs..]
498                        .chars()
499                        .next()
500                        .map(|c| c.len_utf8())
501                        .unwrap_or(1);
502                    result.push_str(&text[offset..abs + next]);
503                    offset = abs + next;
504                }
505            }
506        }
507    }
508    result
509}
510
511/// Find a container body using a tags query.
512///
513/// Used when the language has a `*.tags.scm`.
514/// Runs the tags query to find `@definition.class`, `@definition.module`, or
515/// `@definition.interface` nodes whose name matches `name`, then delegates to
516/// the Language trait's `container_body` / `analyze_container_body` methods.
517fn find_container_body_via_tags(
518    tree: &tree_sitter::Tree,
519    tags_query: &tree_sitter::Query,
520    content: &str,
521    name: &str,
522    support: &dyn Language,
523) -> Option<ContainerBody> {
524    let capture_names = tags_query.capture_names();
525
526    let root = tree.root_node();
527    let mut qcursor = tree_sitter::QueryCursor::new();
528    let mut matches = qcursor.matches(tags_query, root, content.as_bytes());
529
530    while let Some(m) = matches.next() {
531        for capture in m.captures {
532            let cn = capture_names[capture.index as usize];
533            if !matches!(
534                cn,
535                "definition.class" | "definition.module" | "definition.interface"
536            ) {
537                continue;
538            }
539            let node = capture.node;
540            let container_name = support.node_name(&node, content)?;
541            if container_name != name {
542                continue;
543            }
544            let body_node = support.container_body(&node)?;
545            let start_byte = node.start_byte();
546            let line_start = content[..start_byte]
547                .rfind('\n')
548                .map(|i| i + 1)
549                .unwrap_or(0);
550            let container_indent: String = content[line_start..start_byte]
551                .chars()
552                .take_while(|c| c.is_whitespace())
553                .collect();
554            let inner_indent = format!("{}    ", container_indent);
555            if let Some(body) = support.analyze_container_body(&body_node, content, &inner_indent) {
556                return Some(body);
557            }
558        }
559    }
560
561    None
562}
563
564#[cfg(test)]
565mod tests {
566    use super::*;
567    use std::path::PathBuf;
568
569    #[test]
570    fn test_find_python_function() {
571        let editor = Editor::new();
572        let content = r#"
573def foo():
574    pass
575
576def bar():
577    return 42
578"#;
579        let loc = editor.find_symbol(&PathBuf::from("test.py"), content, "bar", false);
580        assert!(loc.is_some());
581        let loc = loc.unwrap();
582        assert_eq!(loc.name, "bar");
583        assert_eq!(loc.kind, "function");
584    }
585
586    #[test]
587    fn test_delete_symbol() {
588        let editor = Editor::new();
589        let content = "def foo():\n    pass\n\ndef bar():\n    return 42\n";
590        let loc = editor
591            .find_symbol(&PathBuf::from("test.py"), content, "bar", false)
592            .unwrap();
593        let result = editor.delete_symbol(content, &loc);
594        assert!(!result.contains("bar"));
595        assert!(result.contains("foo"));
596    }
597
598    #[test]
599    fn test_insert_before() {
600        let editor = Editor::new();
601        let content = "def foo():\n    pass\n\ndef bar():\n    return 42\n";
602        let loc = editor
603            .find_symbol(&PathBuf::from("test.py"), content, "bar", false)
604            .unwrap();
605        let result = editor.insert_before(content, &loc, "def baz():\n    pass");
606        assert!(result.contains("baz"));
607        assert!(result.find("baz").unwrap() < result.find("bar").unwrap());
608    }
609
610    #[test]
611    fn test_prepend_to_python_class() {
612        let editor = Editor::new();
613        let content = r#"class Foo:
614    """Docstring."""
615
616    def first(self):
617        pass
618"#;
619        let body = editor
620            .find_container_body(&PathBuf::from("test.py"), content, "Foo")
621            .unwrap();
622        let result =
623            editor.prepend_to_container(content, &body, "def new_method(self):\n    return 1");
624        // New method should appear after docstring but before first
625        assert!(result.contains("new_method"));
626        let docstring_pos = result.find("Docstring").unwrap();
627        let new_method_pos = result.find("new_method").unwrap();
628        let first_pos = result.find("first").unwrap();
629        assert!(docstring_pos < new_method_pos);
630        assert!(new_method_pos < first_pos);
631    }
632
633    #[test]
634    fn test_append_to_python_class() {
635        let editor = Editor::new();
636        let content = r#"class Foo:
637    def first(self):
638        pass
639
640    def second(self):
641        return 42
642"#;
643        let body = editor
644            .find_container_body(&PathBuf::from("test.py"), content, "Foo")
645            .unwrap();
646        let result = editor.append_to_container(content, &body, "def last(self):\n    return 99");
647        // New method should appear after second
648        assert!(result.contains("last"));
649        let second_pos = result.find("second").unwrap();
650        let last_pos = result.find("last").unwrap();
651        assert!(second_pos < last_pos);
652    }
653
654    #[test]
655    fn test_prepend_to_rust_impl() {
656        let editor = Editor::new();
657        let content = r#"impl Foo {
658    fn first(&self) -> i32 {
659        1
660    }
661}
662"#;
663        let body = editor
664            .find_container_body(&PathBuf::from("test.rs"), content, "Foo")
665            .unwrap();
666        let result =
667            editor.prepend_to_container(content, &body, "fn new() -> Self {\n    Self {}\n}");
668        assert!(result.contains("new"));
669        let new_pos = result.find("new").unwrap();
670        let first_pos = result.find("first").unwrap();
671        assert!(new_pos < first_pos);
672    }
673
674    #[test]
675    fn test_append_to_rust_impl() {
676        let editor = Editor::new();
677        let content = r#"impl Foo {
678    fn first(&self) -> i32 {
679        1
680    }
681}
682"#;
683        let body = editor
684            .find_container_body(&PathBuf::from("test.rs"), content, "Foo")
685            .unwrap();
686        let result =
687            editor.append_to_container(content, &body, "fn last(&self) -> i32 {\n    99\n}");
688        assert!(result.contains("last"));
689        let first_pos = result.find("first").unwrap();
690        let last_pos = result.find("last").unwrap();
691        assert!(first_pos < last_pos);
692        // Should still have closing brace
693        assert!(result.contains("}"));
694    }
695}