Skip to main content

garbage_code_hunter/
utils.rs

1//! Utility functions for text-based analysis.
2
3/// Check if a line is a comment
4fn is_comment_line(trimmed: &str) -> bool {
5    trimmed.starts_with("///")
6        || trimmed.starts_with("//!")
7        || trimmed.starts_with("//")
8        || trimmed.starts_with("/*")
9        || trimmed.starts_with("*")
10}
11
12/// Find the line number of a string literal in source content (skipping comments)
13pub fn find_line_of_str(content: &str, target: &str) -> usize {
14    for (i, line) in content.lines().enumerate() {
15        let trimmed = line.trim();
16        if is_comment_line(trimmed) {
17            continue;
18        }
19        if line.contains(target) {
20            return i + 1;
21        }
22    }
23    1
24}
25
26/// Find the line number of a string literal, skipping comments and import statements
27pub fn find_line_of_str_non_import(content: &str, target: &str) -> usize {
28    for (i, line) in content.lines().enumerate() {
29        let trimmed = line.trim();
30        if is_comment_line(trimmed) || trimmed.starts_with("use ") {
31            continue;
32        }
33        if line.contains(target) {
34            return i + 1;
35        }
36    }
37    1
38}
39
40/// Count non-comment occurrences of a pattern in source content
41pub fn count_non_comment_matches(content: &str, target: &str) -> usize {
42    let mut count = 0;
43    for line in content.lines() {
44        let trimmed = line.trim();
45        if is_comment_line(trimmed) {
46            continue;
47        }
48        count += line.matches(target).count();
49    }
50    count
51}
52
53/// Get (line, column) from a byte offset in source content.
54pub fn get_position_from_content(content: &str, byte_offset: usize) -> (usize, usize) {
55    let mut line = 1;
56    let mut col = 1;
57    for (i, ch) in content.char_indices() {
58        if i >= byte_offset {
59            break;
60        }
61        if ch == '\n' {
62            line += 1;
63            col = 1;
64        } else {
65            col += 1;
66        }
67    }
68    (line, col)
69}
70
71/// Truncate a string to a maximum length, appending "..." if truncated.
72///
73/// Uses char-aware slicing to avoid panicking on multi-byte UTF-8 boundaries.
74pub fn truncate(s: &str, max: usize) -> String {
75    if s.len() <= max {
76        s.to_string()
77    } else {
78        let mut end = max.saturating_sub(3);
79        // Find a valid char boundary
80        while !s.is_char_boundary(end) && end > 0 {
81            end -= 1;
82        }
83        format!("{}...", &s[..end])
84    }
85}
86
87/// Count non-comment, non-import occurrences of a pattern in source content
88pub fn count_non_import_matches(content: &str, target: &str) -> usize {
89    let mut count = 0;
90    for line in content.lines() {
91        let trimmed = line.trim();
92        if is_comment_line(trimmed) || trimmed.starts_with("use ") {
93            continue;
94        }
95        count += line.matches(target).count();
96    }
97    count
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    // ── find_line_of_str ──────────────────────────────────────────
105
106    /// Objective: Verify find_line_of_str returns the correct line (1-indexed)
107    ///            when the target appears in code, skipping comment lines.
108    /// Invariants: Returned line must be > 0. Comment-only lines are skipped.
109    #[test]
110    fn test_find_line_of_str_finds_target() {
111        let content = "fn main() {\n    let x = 1;\n    println!(\"{}\", x);\n}";
112        let line = find_line_of_str(content, "println");
113        assert_eq!(line, 3, "println appears on line 3, got {line}");
114    }
115
116    /// Objective: Verify that when the target string does not exist
117    ///            in the content, the function returns the fallback value 1.
118    /// Invariants: Never panics on missing target. Always returns a valid line number.
119    #[test]
120    fn test_find_line_of_str_returns_fallback_when_missing() {
121        assert_eq!(
122            find_line_of_str("fn main() {}", "nonexistent"),
123            1,
124            "should return fallback line 1 when target is absent"
125        );
126    }
127
128    /// Objective: Verify that commented-out target strings are not matched.
129    /// Invariants: Lines starting with // are skipped regardless of content.
130    #[test]
131    fn test_find_line_of_str_skips_comments() {
132        let content = "// println!(\"hidden\")\nfn main() {\n    println!(\"real\");\n}";
133        let line = find_line_of_str(content, "println");
134        assert_eq!(
135            line, 3,
136            "should skip the comment on line 1 and find println on line 3"
137        );
138    }
139
140    /// Objective: Verify behavior with empty content — no crash, returns fallback.
141    #[test]
142    fn test_find_line_of_str_empty_content_does_not_crash() {
143        assert_eq!(
144            find_line_of_str("", "anything"),
145            1,
146            "empty content should return fallback line 1"
147        );
148    }
149
150    // ── find_line_of_str_non_import ────────────────────────────────
151
152    /// Objective: Verify that `use` lines are skipped when searching for a target.
153    /// Invariants: Lines starting with "use " are ignored.
154    #[test]
155    fn test_find_line_of_str_non_import_skips_use_lines() {
156        let content = "use std::io;\nfn main() {\n    io::stdout();\n}";
157        let line = find_line_of_str_non_import(content, "io");
158        assert_eq!(line, 3, "should skip 'use std::io;' and find io on line 3");
159    }
160
161    /// Objective: Verify both comments AND imports are skipped simultaneously.
162    #[test]
163    fn test_find_line_of_str_non_import_skips_comments_and_imports() {
164        let content = "// comment\nuse std::fmt;\nfn run() {\n    fmt::format(\"hi\");\n}";
165        let line = find_line_of_str_non_import(content, "format");
166        assert_eq!(line, 4, "should skip comment line 1 and use line 2");
167    }
168
169    /// Objective: Verify that target appearing inside a `use` line is NOT matched.
170    #[test]
171    fn test_find_line_of_str_non_import_target_only_in_use_not_found() {
172        let content = "use std::fmt;\nfn run() {}";
173        let line = find_line_of_str_non_import(content, "fmt");
174        assert_eq!(
175            line, 1,
176            "'fmt' only appears in use line, should fallback to 1"
177        );
178    }
179
180    // ── count_non_comment_matches ──────────────────────────────────
181
182    /// Objective: Verify that comments are excluded from match counting.
183    /// Invariants: `//` lines contribute 0 matches regardless of content.
184    #[test]
185    fn test_count_non_comment_matches_excludes_comments() {
186        let content = "let x = 1;\n// let x = 2;\nlet y = 1;";
187        assert_eq!(
188            count_non_comment_matches(content, "let"),
189            2,
190            "should count 'let' in lines 1 and 3 only, skipping the comment on line 2"
191        );
192    }
193
194    /// Objective: Verify empty result when target does not exist in non-comment code.
195    #[test]
196    fn test_count_non_comment_matches_returns_zero_for_absent_target() {
197        assert_eq!(
198            count_non_comment_matches("fn main() {}", "println"),
199            0,
200            "no println in code, should be 0"
201        );
202    }
203
204    /// Objective: Verify that doc comments (///) are also excluded.
205    /// Invariants: `///` lines are treated as comments and skipped.
206    #[test]
207    fn test_count_non_comment_matches_excludes_doc_comments() {
208        let content = "/// some code: let a = 1\nfn foo() { let b = 2; }";
209        assert_eq!(
210            count_non_comment_matches(content, "let"),
211            1,
212            "doc comment 'let' should not be counted; only the code 'let' on line 2"
213        );
214    }
215
216    /// Objective: Verify that block comment start (/*) lines are excluded.
217    #[test]
218    fn test_count_non_comment_matches_excludes_block_comment_start() {
219        let content = "/* let hidden = 1; */\nfn foo() { let visible = 2; }";
220        assert_eq!(
221            count_non_comment_matches(content, "let"),
222            1,
223            "block comment line should be excluded"
224        );
225    }
226
227    // ── get_position_from_content ──────────────────────────────────
228
229    /// Objective: Verify byte offset 0 maps to (line 1, col 1).
230    #[test]
231    fn test_get_position_at_start() {
232        let (line, col) = get_position_from_content("hello", 0);
233        assert_eq!((line, col), (1, 1), "byte 0 should be line 1, col 1");
234    }
235
236    /// Objective: Verify position advances correctly within a single line.
237    #[test]
238    fn test_get_position_mid_line() {
239        let (line, col) = get_position_from_content("abcde", 3);
240        assert_eq!(
241            (line, col),
242            (1, 4),
243            "byte 3 (0-indexed) is the 4th character"
244        );
245    }
246
247    /// Objective: Verify newline advances the line counter and resets column.
248    #[test]
249    fn test_get_position_at_newline_boundary() {
250        let content = "first\nsecond\nthird";
251        let pos = content.find("second").expect("second should exist");
252        let (line, col) = get_position_from_content(content, pos);
253        assert_eq!((line, col), (2, 1), "'second' starts at line 2, col 1");
254    }
255
256    /// Objective: Verify that an offset beyond content length does not panic
257    ///            and returns the position at the end of the last character.
258    #[test]
259    fn test_get_position_beyond_end_does_not_crash() {
260        let (line, col) = get_position_from_content("hi", 999);
261        assert_eq!(
262            (line, col),
263            (1, 3),
264            "beyond-end offset should land at end of content"
265        );
266    }
267
268    /// Objective: Verify behavior with empty content — no crash.
269    #[test]
270    fn test_get_position_empty_content_does_not_crash() {
271        let (line, col) = get_position_from_content("", 0);
272        assert_eq!(
273            (line, col),
274            (1, 1),
275            "empty content at offset 0 is line 1, col 1"
276        );
277    }
278
279    // ── truncate ───────────────────────────────────────────────────
280
281    /// Objective: Verify that strings shorter than max are returned unchanged.
282    #[test]
283    fn test_truncate_short_string_unchanged() {
284        let result = truncate("hello", 10);
285        assert_eq!(
286            result, "hello",
287            "string shorter than max should not be truncated"
288        );
289    }
290
291    /// Objective: Verify that strings exactly at max length are NOT truncated.
292    /// Invariants: Only strings longer than max get truncated.
293    #[test]
294    fn test_truncate_exact_length_kept() {
295        let result = truncate("hello", 5);
296        assert_eq!(
297            result, "hello",
298            "string equal to max should not be truncated"
299        );
300    }
301
302    /// Objective: Verify truncation appends "..." and shortens the string appropriately.
303    #[test]
304    fn test_truncate_appends_ellipsis() {
305        let result = truncate("hello world", 8);
306        assert_eq!(result, "hello...", "should keep 5 chars + '...' = 8 total");
307    }
308
309    /// Objective: Verify that multi-byte UTF-8 characters do not cause a panic
310    ///            at character boundary split points.
311    /// Invariants: The function must not slice in the middle of a multi-byte char.
312    #[test]
313    fn test_truncate_multi_byte_char_boundary_no_panic() {
314        let s = "héllo wörld";
315        // max=5 -> end=2, which lands in the middle of 2-byte 'é', forcing is_char_boundary fallback
316        let result = truncate(s, 5);
317        assert_eq!(
318            result, "h...",
319            "max=5 on 'héllo wörld' should fall back past 2-byte é and produce 'h...', got '{result}'"
320        );
321    }
322
323    /// Objective: Verify that truncate with max=0 returns "..." only.
324    /// Invariants: When max < 3, the function still produces valid output.
325    #[test]
326    fn test_truncate_max_zero_returns_ellipsis_only() {
327        let result = truncate("hello", 0);
328        assert_eq!(result, "...", "max=0 should produce '...'");
329    }
330
331    /// Objective: Verify that max < 3 still produces valid (non-panicking) output.
332    #[test]
333    fn test_truncate_max_one_produces_ellipsis() {
334        let result = truncate("hello", 1);
335        assert_eq!(result, "...", "max=1 should produce '...'");
336    }
337
338    // ── count_non_import_matches ───────────────────────────────────
339
340    /// Objective: Verify that both `use` lines and comment lines are excluded.
341    /// Invariants: Lines starting with "use " or comment markers are skipped.
342    #[test]
343    fn test_count_non_import_matches_excludes_use_and_comments() {
344        let content = "use std::fmt;\n// use std::io;\nfn main() { fmt::println!(); }";
345        assert_eq!(
346            count_non_import_matches(content, "fmt"),
347            1,
348            "only line 3 should match, lines 1 (use) and 2 (comment) are excluded"
349        );
350    }
351
352    /// Objective: Verify that only code lines are counted.
353    #[test]
354    fn test_count_non_import_matches_code_only() {
355        let content = "let a = 1;\nlet b = 2;\nfn add(x: i32, y: i32) -> i32 { x + y }";
356        assert_eq!(
357            count_non_import_matches(content, "let"),
358            2,
359            "both 'let' in code lines count, got wrong count"
360        );
361    }
362
363    /// Objective: Verify zero matches when target is absent from all non-import/non-comment lines.
364    #[test]
365    fn test_count_non_import_matches_zero_for_absent_target() {
366        let content = "fn main() { loop {} }";
367        assert_eq!(
368            count_non_import_matches(content, "println"),
369            0,
370            "no println in code => 0"
371        );
372    }
373}