Skip to main content

llmwiki_tooling/
splice.rs

1use std::ops::Range;
2
3/// Apply non-overlapping byte-range replacements to a source string.
4///
5/// Replacements are applied back-to-front (highest offset first) so that
6/// earlier byte offsets remain valid after later replacements.
7pub fn apply(source: &str, edits: &[(Range<usize>, String)]) -> String {
8    let mut sorted: Vec<_> = edits.iter().collect();
9    sorted.sort_by(|a, b| b.0.start.cmp(&a.0.start));
10
11    let mut result = source.to_owned();
12    for (range, replacement) in sorted {
13        result.replace_range(range.clone(), replacement);
14    }
15    result
16}
17
18/// Compute a unified-diff-style display of planned edits.
19///
20/// Each edit is shown with its surrounding context (the line containing the edit).
21pub fn diff(source: &str, path: &std::path::Path, edits: &[(Range<usize>, String)]) -> String {
22    if edits.is_empty() {
23        return String::new();
24    }
25
26    let line_offsets = compute_line_offsets(source);
27    let mut sorted: Vec<_> = edits.iter().collect();
28    sorted.sort_by_key(|(range, _)| range.start);
29
30    let mut output = format!("--- {}\n+++ {}\n", path.display(), path.display());
31
32    for (range, replacement) in &sorted {
33        let line_num = offset_to_line(&line_offsets, range.start);
34        let line_start = line_offsets[line_num];
35        let line_end = line_offsets
36            .get(line_num + 1)
37            .copied()
38            .unwrap_or(source.len());
39        let original_line = &source[line_start..line_end];
40
41        // Build the modified line
42        let prefix = &source[line_start..range.start];
43        let suffix = &source[range.end..line_end];
44        let modified_line = format!("{prefix}{replacement}{suffix}");
45
46        output.push_str(&format!("@@ -{} +{} @@\n", line_num + 1, line_num + 1));
47        output.push_str(&format!("-{original_line}"));
48        if !original_line.ends_with('\n') {
49            output.push('\n');
50        }
51        output.push_str(&format!("+{modified_line}"));
52        if !modified_line.ends_with('\n') {
53            output.push('\n');
54        }
55    }
56
57    output
58}
59
60/// Compute byte offsets of each line start in the source.
61pub fn compute_line_offsets(source: &str) -> Vec<usize> {
62    let mut offsets = vec![0];
63    for (i, c) in source.char_indices() {
64        if c == '\n' {
65            offsets.push(i + 1);
66        }
67    }
68    offsets
69}
70
71/// Convert a byte offset to a 0-based line number.
72pub fn offset_to_line(line_offsets: &[usize], offset: usize) -> usize {
73    match line_offsets.binary_search(&offset) {
74        Ok(line) => line,
75        Err(line) => line.saturating_sub(1),
76    }
77}
78
79/// Convert a byte offset to (1-based line, 1-based column).
80pub fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
81    let line_offsets = compute_line_offsets(source);
82    let line = offset_to_line(&line_offsets, offset);
83    let col = offset - line_offsets[line];
84    (line + 1, col + 1)
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    #[test]
92    fn apply_single_replacement() {
93        let source = "hello GRPO world";
94        let edits = vec![(6..10, "[[GRPO]]".to_owned())];
95        assert_eq!(apply(source, &edits), "hello [[GRPO]] world");
96    }
97
98    #[test]
99    fn apply_multiple_non_overlapping() {
100        let source = "DPO and GRPO are methods";
101        let edits = vec![(0..3, "[[DPO]]".to_owned()), (8..12, "[[GRPO]]".to_owned())];
102        assert_eq!(apply(source, &edits), "[[DPO]] and [[GRPO]] are methods");
103    }
104
105    #[test]
106    fn apply_preserves_surrounding_text() {
107        let source = "before RLHF after";
108        let edits = vec![(7..11, "[[RLHF]]".to_owned())];
109        let result = apply(source, &edits);
110        assert_eq!(result, "before [[RLHF]] after");
111    }
112
113    #[test]
114    fn offset_to_line_col_first_line() {
115        let source = "hello world";
116        assert_eq!(offset_to_line_col(source, 0), (1, 1));
117        assert_eq!(offset_to_line_col(source, 6), (1, 7));
118    }
119
120    #[test]
121    fn offset_to_line_col_multiline() {
122        let source = "line one\nline two\nline three";
123        assert_eq!(offset_to_line_col(source, 9), (2, 1));
124        assert_eq!(offset_to_line_col(source, 14), (2, 6));
125        assert_eq!(offset_to_line_col(source, 18), (3, 1));
126    }
127}