Skip to main content

skilllite_fs/
search_replace.rs

1//! search_replace 与 insert_lines:精确/模糊替换、行插入
2
3use anyhow::Result;
4
5/// 纯内存:精确 search_replace
6pub fn apply_search_replace(
7    content: &str,
8    old_string: &str,
9    new_string: &str,
10    replace_all: bool,
11) -> Result<(String, usize)> {
12    if old_string.is_empty() {
13        anyhow::bail!("old_string cannot be empty");
14    }
15    let count = content.matches(old_string).count();
16    if count == 0 {
17        anyhow::bail!("old_string not found in content");
18    }
19    let new_content = if replace_all {
20        content.replace(old_string, new_string)
21    } else {
22        content.replacen(old_string, new_string, 1)
23    };
24    let replaced = if replace_all { count } else { 1 };
25    Ok((new_content, replaced))
26}
27
28/// 模糊匹配结果
29#[derive(Debug, Clone)]
30pub struct FuzzyReplaceResult {
31    pub match_type: String,
32    pub total_occurrences: usize,
33    pub replaced_count: usize,
34    pub first_match_start: usize,
35    pub first_match_len: usize,
36    pub new_content: String,
37}
38
39/// 精确或模糊替换(单次替换时启用 fuzzy fallback)
40pub fn apply_replace_fuzzy(
41    content: &str,
42    old_string: &str,
43    new_string: &str,
44    replace_all: bool,
45) -> Result<FuzzyReplaceResult> {
46    if old_string.is_empty() {
47        anyhow::bail!("old_string cannot be empty");
48    }
49    let exact_count = content.matches(old_string).count();
50    if exact_count > 0 {
51        if !replace_all && exact_count > 1 {
52            anyhow::bail!(
53                "Found {} occurrences of old_string. search_replace requires a unique match by default; add more context or set replace_all=true.",
54                exact_count
55            );
56        }
57        let first_start = content.find(old_string).unwrap_or(0);
58        let new_content = if replace_all {
59            content.replace(old_string, new_string)
60        } else {
61            content.replacen(old_string, new_string, 1)
62        };
63        return Ok(FuzzyReplaceResult {
64            match_type: "exact".to_string(),
65            total_occurrences: exact_count,
66            replaced_count: if replace_all { exact_count } else { 1 },
67            first_match_start: first_start,
68            first_match_len: old_string.len(),
69            new_content,
70        });
71    }
72    if replace_all {
73        anyhow::bail!("old_string not found in content");
74    }
75    match fuzzy_find(content, old_string) {
76        Some(fm) => {
77            let new_content = format!(
78                "{}{}{}",
79                &content[..fm.start],
80                new_string,
81                &content[fm.end..],
82            );
83            Ok(FuzzyReplaceResult {
84                match_type: fm.match_type,
85                total_occurrences: 1,
86                replaced_count: 1,
87                first_match_start: fm.start,
88                first_match_len: fm.end - fm.start,
89                new_content,
90            })
91        }
92        None => {
93            let hint = build_failure_hint(content, old_string);
94            anyhow::bail!(
95                "old_string not found in file (tried exact + fuzzy matching).\n\n{}\n\nTip: Copy the exact text from above into old_string, or use insert_lines with line number.",
96                hint
97            )
98        }
99    }
100}
101
102/// normalize_whitespace 模式:忽略行尾空白
103pub fn apply_replace_normalize_whitespace(
104    content: &str,
105    old_string: &str,
106    new_string: &str,
107    replace_all: bool,
108) -> Result<FuzzyReplaceResult> {
109    let escaped = regex::escape(old_string);
110    let pattern = format!(r"({})([ \t]*)(\r?\n|$)", escaped);
111    let re = regex::Regex::new(&pattern).map_err(|e| anyhow::anyhow!("Invalid regex: {}", e))?;
112    let matches: Vec<_> = re.find_iter(content).collect();
113    let count = matches.len();
114    if count == 0 {
115        anyhow::bail!("old_string not found (with normalize_whitespace)");
116    }
117    if !replace_all && count > 1 {
118        anyhow::bail!(
119            "Found {} occurrences. Add more context or set replace_all=true.",
120            count
121        );
122    }
123    let first = matches[0];
124    let new_content = if replace_all {
125        re.replace_all(content, |caps: &regex::Captures| {
126            let newline = caps.get(3).map_or("", |m| m.as_str());
127            format!("{}{}", new_string, newline)
128        })
129        .into_owned()
130    } else {
131        re.replacen(content, 1, |caps: &regex::Captures| {
132            let newline = caps.get(3).map_or("", |m| m.as_str());
133            format!("{}{}", new_string, newline)
134        })
135        .into_owned()
136    };
137    Ok(FuzzyReplaceResult {
138        match_type: "exact".to_string(),
139        total_occurrences: count,
140        replaced_count: if replace_all { count } else { 1 },
141        first_match_start: first.start(),
142        first_match_len: first.end() - first.start(),
143        new_content,
144    })
145}
146
147/// 在指定行后插入内容,支持 auto-indent
148pub fn insert_lines_at(content: &str, line_num: usize, insert_content: &str) -> Result<String> {
149    let lines: Vec<&str> = content.lines().collect();
150    let total = lines.len();
151    if line_num > total {
152        anyhow::bail!("Line {} is beyond end of file ({} lines)", line_num, total);
153    }
154    let offsets = line_byte_offsets(content);
155    let insert_at = if line_num == 0 {
156        0
157    } else {
158        offsets.get(line_num).copied().unwrap_or(content.len())
159    };
160    let needs_preceding_newline = line_num > 0
161        && insert_at == content.len()
162        && !content.is_empty()
163        && !content.ends_with('\n');
164    let indented = auto_indent(insert_content, &lines, line_num);
165    let effective = indented.as_deref().unwrap_or(insert_content);
166    let with_newline = if effective.ends_with('\n') {
167        effective.to_string()
168    } else {
169        format!("{}\n", effective)
170    };
171    let new_content = if needs_preceding_newline {
172        format!(
173            "{}\n{}{}",
174            &content[..insert_at],
175            with_newline,
176            &content[insert_at..]
177        )
178    } else {
179        format!(
180            "{}{}{}",
181            &content[..insert_at],
182            with_newline,
183            &content[insert_at..]
184        )
185    };
186    Ok(new_content)
187}
188
189/// 行首字节偏移
190pub fn line_byte_offsets(content: &str) -> Vec<usize> {
191    let mut offsets = vec![0];
192    for (i, byte) in content.bytes().enumerate() {
193        if byte == b'\n' {
194            offsets.push(i + 1);
195        }
196    }
197    offsets
198}
199
200/// 安全截取片段(避免在字符中间切断)
201pub fn safe_excerpt(content: &str, start: usize, span_len: usize, max_len: usize) -> String {
202    let prefix = 80usize;
203    let suffix = 80usize;
204    let begin = floor_char_boundary(content, start.saturating_sub(prefix));
205    let end = ceil_char_boundary(content, (start + span_len + suffix).min(content.len()));
206    let mut excerpt = content[begin..end].replace('\n', "\\n");
207    if excerpt.len() > max_len {
208        let safe_len = floor_char_boundary(&excerpt, max_len);
209        excerpt.truncate(safe_len);
210        excerpt.push_str("...");
211    }
212    excerpt
213}
214
215fn floor_char_boundary(s: &str, idx: usize) -> usize {
216    let mut i = idx.min(s.len());
217    while i > 0 && !s.is_char_boundary(i) {
218        i -= 1;
219    }
220    i
221}
222
223fn ceil_char_boundary(s: &str, idx: usize) -> usize {
224    let mut i = idx.min(s.len());
225    while i < s.len() && !s.is_char_boundary(i) {
226        i += 1;
227    }
228    i
229}
230
231pub fn build_failure_hint(content: &str, old_string: &str) -> String {
232    let old_lines: Vec<&str> = old_string.lines().collect();
233    if old_lines.is_empty() || content.is_empty() {
234        return "File is empty or old_string is empty.".to_string();
235    }
236    let content_lines: Vec<&str> = content.lines().collect();
237    if content_lines.is_empty() {
238        return "File is empty.".to_string();
239    }
240    let mut best_score = 0.0_f64;
241    let mut best_pos = 0_usize;
242    let window = old_lines.len().min(content_lines.len());
243    for i in 0..=(content_lines.len().saturating_sub(window)) {
244        let mut total = 0.0;
245        for j in 0..window {
246            total += levenshtein_similarity(
247                old_lines.get(j).unwrap_or(&"").trim(),
248                content_lines[i + j].trim(),
249            );
250        }
251        let avg = total / window as f64;
252        if avg > best_score {
253            best_score = avg;
254            best_pos = i;
255        }
256    }
257    let ctx = 5;
258    let start = best_pos.saturating_sub(ctx);
259    let end = (best_pos + window + ctx).min(content_lines.len());
260    let mut hint = format!(
261        "Closest match found at lines {}-{} (similarity: {:.2}):\n",
262        best_pos + 1,
263        best_pos + window,
264        best_score
265    );
266    for (i, line) in content_lines.iter().enumerate().take(end).skip(start) {
267        hint.push_str(&format!("{:>6}|{}\n", i + 1, line));
268    }
269    hint
270}
271
272fn auto_indent(content: &str, lines: &[&str], after_line: usize) -> Option<String> {
273    let ref_line = if after_line < lines.len() {
274        lines[after_line]
275    } else if after_line > 0 {
276        lines[after_line - 1]
277    } else if !lines.is_empty() {
278        lines[0]
279    } else {
280        return None;
281    };
282    let indent = detect_indentation(ref_line);
283    if indent.is_empty() {
284        return None;
285    }
286    let has_indent = content
287        .lines()
288        .filter(|l| !l.trim().is_empty())
289        .any(|l| l.starts_with(' ') || l.starts_with('\t'));
290    if has_indent {
291        return None;
292    }
293    let indented: Vec<String> = content
294        .lines()
295        .map(|l| {
296            if l.trim().is_empty() {
297                l.to_string()
298            } else {
299                format!("{}{}", indent, l)
300            }
301        })
302        .collect();
303    Some(indented.join("\n"))
304}
305
306fn detect_indentation(line: &str) -> &str {
307    let trimmed_len = line.trim_start().len();
308    &line[..line.len().saturating_sub(trimmed_len)]
309}
310
311// ─── Fuzzy match ───────────────────────────────────────────────────────────
312
313struct FuzzyMatch {
314    start: usize,
315    end: usize,
316    match_type: String,
317}
318
319const FUZZY_THRESHOLD: f64 = 0.85;
320
321fn fuzzy_find(content: &str, old_string: &str) -> Option<FuzzyMatch> {
322    fuzzy_find_whitespace(content, old_string)
323        .or_else(|| fuzzy_find_blank_lines(content, old_string))
324        .or_else(|| {
325            let threshold = std::env::var("SKILLLITE_FUZZY_THRESHOLD")
326                .ok()
327                .and_then(|v| v.parse::<f64>().ok())
328                .unwrap_or(FUZZY_THRESHOLD);
329            fuzzy_find_similarity(content, old_string, threshold)
330        })
331}
332
333fn fuzzy_find_whitespace(content: &str, old_string: &str) -> Option<FuzzyMatch> {
334    let old_lines: Vec<&str> = old_string.lines().collect();
335    if old_lines.is_empty() {
336        return None;
337    }
338    let content_lines: Vec<&str> = content.lines().collect();
339    if content_lines.len() < old_lines.len() {
340        return None;
341    }
342    let trimmed_old: Vec<&str> = old_lines.iter().map(|l| l.trim()).collect();
343    if trimmed_old.iter().all(|l| l.is_empty()) {
344        return None;
345    }
346    let offsets = line_byte_offsets(content);
347    for i in 0..=(content_lines.len() - old_lines.len()) {
348        let all_match = (0..old_lines.len()).all(|j| content_lines[i + j].trim() == trimmed_old[j]);
349        if all_match {
350            let start = offsets[i];
351            let end = fuzzy_match_end(
352                content,
353                &offsets,
354                &content_lines,
355                i,
356                old_lines.len(),
357                old_string.ends_with('\n'),
358            );
359            return Some(FuzzyMatch {
360                start,
361                end,
362                match_type: "whitespace_fuzzy".to_string(),
363            });
364        }
365    }
366    None
367}
368
369fn fuzzy_find_blank_lines(content: &str, old_string: &str) -> Option<FuzzyMatch> {
370    let old_non_blank: Vec<&str> = old_string
371        .lines()
372        .filter(|l| !l.trim().is_empty())
373        .collect();
374    if old_non_blank.is_empty() {
375        return None;
376    }
377    let content_lines: Vec<&str> = content.lines().collect();
378    let offsets = line_byte_offsets(content);
379    for start_line in 0..content_lines.len() {
380        if content_lines[start_line].trim().is_empty() {
381            continue;
382        }
383        let mut old_idx = 0;
384        let mut last_matched = start_line;
385        for (i, line) in content_lines.iter().enumerate().skip(start_line) {
386            if line.trim().is_empty() {
387                continue;
388            }
389            if old_idx < old_non_blank.len() && *line == old_non_blank[old_idx] {
390                old_idx += 1;
391                last_matched = i;
392            } else {
393                break;
394            }
395        }
396        if old_idx == old_non_blank.len() {
397            let start = offsets[start_line];
398            let end = fuzzy_match_end(
399                content,
400                &offsets,
401                &content_lines,
402                last_matched,
403                1,
404                old_string.ends_with('\n'),
405            );
406            return Some(FuzzyMatch {
407                start,
408                end,
409                match_type: "blank_line_fuzzy".to_string(),
410            });
411        }
412    }
413    None
414}
415
416fn fuzzy_find_similarity(content: &str, old_string: &str, threshold: f64) -> Option<FuzzyMatch> {
417    let old_lines: Vec<&str> = old_string.lines().collect();
418    if old_lines.is_empty() {
419        return None;
420    }
421    let content_lines: Vec<&str> = content.lines().collect();
422    if content_lines.len() < old_lines.len() {
423        return None;
424    }
425    let offsets = line_byte_offsets(content);
426    let mut best_score = 0.0_f64;
427    let mut best_pos = 0_usize;
428    for i in 0..=(content_lines.len() - old_lines.len()) {
429        let mut total = 0.0;
430        for j in 0..old_lines.len() {
431            total += levenshtein_similarity(old_lines[j].trim(), content_lines[i + j].trim());
432        }
433        let avg = total / old_lines.len() as f64;
434        if avg > best_score {
435            best_score = avg;
436            best_pos = i;
437        }
438    }
439    if best_score >= threshold {
440        let start = offsets[best_pos];
441        let end = fuzzy_match_end(
442            content,
443            &offsets,
444            &content_lines,
445            best_pos,
446            old_lines.len(),
447            old_string.ends_with('\n'),
448        );
449        Some(FuzzyMatch {
450            start,
451            end,
452            match_type: format!("similarity({:.2})", best_score),
453        })
454    } else {
455        None
456    }
457}
458
459fn fuzzy_match_end(
460    content: &str,
461    offsets: &[usize],
462    content_lines: &[&str],
463    start_line: usize,
464    num_lines: usize,
465    old_ends_with_newline: bool,
466) -> usize {
467    let end_line_idx = start_line + num_lines;
468    if old_ends_with_newline {
469        offsets.get(end_line_idx).copied().unwrap_or(content.len())
470    } else {
471        let last = start_line + num_lines - 1;
472        (offsets[last] + content_lines[last].len()).min(content.len())
473    }
474}
475
476fn levenshtein_similarity(a: &str, b: &str) -> f64 {
477    let max_len = a.len().max(b.len());
478    if max_len == 0 {
479        return 1.0;
480    }
481    1.0 - levenshtein_distance(a, b) as f64 / max_len as f64
482}
483
484fn levenshtein_distance(a: &str, b: &str) -> usize {
485    let a: Vec<char> = a.chars().collect();
486    let b: Vec<char> = b.chars().collect();
487    let (a_len, b_len) = (a.len(), b.len());
488    if a_len == 0 {
489        return b_len;
490    }
491    if b_len == 0 {
492        return a_len;
493    }
494    let mut prev: Vec<usize> = (0..=b_len).collect();
495    let mut curr = vec![0; b_len + 1];
496    for (i, &ac) in a.iter().enumerate().take(a_len) {
497        curr[0] = i + 1;
498        for (j, &bc) in b.iter().enumerate() {
499            let cost = if ac == bc { 0 } else { 1 };
500            curr[j + 1] = (prev[j] + cost).min(curr[j] + 1).min(prev[j + 1] + 1);
501        }
502        std::mem::swap(&mut prev, &mut curr);
503    }
504    prev[b_len]
505}