split_modules/
util.rs

1//! Small pure helpers: identifier casing, keyword handling, and comment scanning.
2
3/// Convert a Rust identifier (CamelCase, SCREAMING_SNAKE, or mixed) into a
4/// snake_case module file stem.
5///
6/// Examples: `Foo` → `foo`, `HTTPServer` → `http_server`, `MAX_SIZE` → `max_size`,
7/// `IOError` → `io_error`, `parse_input` → `parse_input`.
8pub fn to_snake(ident: &str) -> String {
9    let chars: Vec<char> = ident.chars().collect();
10    let mut out = String::with_capacity(ident.len() + 4);
11    for (i, &c) in chars.iter().enumerate() {
12        if c == '_' {
13            // Collapse to a single underscore; never start with one.
14            if !out.ends_with('_') && !out.is_empty() {
15                out.push('_');
16            }
17            continue;
18        }
19        if c.is_ascii_uppercase() {
20            let prev = if i > 0 { Some(chars[i - 1]) } else { None };
21            let next = chars.get(i + 1).copied();
22            let boundary = match prev {
23                None => false,
24                Some('_') => false,
25                // lower/digit -> Upper : boundary  (parseInput | http2Server)
26                Some(p) if p.is_ascii_lowercase() || p.is_ascii_digit() => true,
27                // Upper -> Upper followed by lower : boundary (HTTPServer -> HTTP|Server)
28                Some(p) if p.is_ascii_uppercase() => {
29                    matches!(next, Some(n) if n.is_ascii_lowercase())
30                }
31                _ => false,
32            };
33            if boundary && !out.is_empty() && !out.ends_with('_') {
34                out.push('_');
35            }
36            out.push(c.to_ascii_lowercase());
37        } else {
38            out.push(c);
39        }
40    }
41    let trimmed = out.trim_matches('_').to_string();
42    let stem = if trimmed.is_empty() { "item".to_string() } else { trimmed };
43    sanitize_stem(&stem)
44}
45
46/// Strict + reserved Rust keywords that cannot be used as a bare module name.
47const KEYWORDS: &[&str] = &[
48    "as", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern", "false", "fn",
49    "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
50    "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe",
51    "use", "where", "while", "async", "await", "abstract", "become", "box", "do", "final",
52    "macro", "override", "priv", "typeof", "unsized", "virtual", "yield", "try", "gen",
53];
54
55/// Ensure a file stem is a usable module name (not a keyword, not empty).
56fn sanitize_stem(stem: &str) -> String {
57    if KEYWORDS.contains(&stem) {
58        format!("{stem}_")
59    } else {
60        stem.to_string()
61    }
62}
63
64/// Is `name` a Rust keyword (used to decide whether a `mod`/`use` needs adjustment)?
65pub fn is_keyword(name: &str) -> bool {
66    KEYWORDS.contains(&name)
67}
68
69/// Byte offset of the start of the line containing `byte` in `src`.
70pub fn line_start(src: &str, byte: usize) -> usize {
71    src[..byte].rfind('\n').map(|i| i + 1).unwrap_or(0)
72}
73
74/// Given the gap between the previous item's end and this item's start, return the
75/// byte offset at which a contiguous block of plain `//` comments directly above the
76/// item begins. Doc-comments are already part of the item span, so they never appear
77/// here. Returns `item_start` when there is no attached comment block.
78///
79/// A comment block is "attached" only if it is immediately above the item with no
80/// intervening blank line.
81pub fn leading_comment_start(src: &str, gap_start: usize, item_start: usize) -> usize {
82    let ls = line_start(src, item_start);
83    if ls <= gap_start {
84        return item_start;
85    }
86    // Walk upward over whole lines strictly above the item's line.
87    let mut block_start = ls;
88    let mut cursor = ls;
89    loop {
90        if cursor <= gap_start {
91            break;
92        }
93        // `cursor` is the start of a line; find the start of the previous line.
94        let prev_line_end = cursor - 1; // the '\n' ending the previous line
95        let prev_line_start = src[gap_start..prev_line_end]
96            .rfind('\n')
97            .map(|i| gap_start + i + 1)
98            .unwrap_or(gap_start);
99        let line = src[prev_line_start..prev_line_end].trim();
100        let is_comment = line.starts_with("//") && !line.starts_with("///") && !line.starts_with("//!");
101        // `///`/`//!` shouldn't appear in the gap, but guard anyway.
102        if is_comment || (line.starts_with("//") && prev_line_start >= gap_start) {
103            block_start = prev_line_start;
104            cursor = prev_line_start;
105        } else {
106            break;
107        }
108    }
109    if block_start < ls {
110        block_start
111    } else {
112        item_start
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn snake_cases() {
122        assert_eq!(to_snake("Foo"), "foo");
123        assert_eq!(to_snake("FooBar"), "foo_bar");
124        assert_eq!(to_snake("HTTPServer"), "http_server");
125        assert_eq!(to_snake("IOError"), "io_error");
126        assert_eq!(to_snake("MAX_SIZE"), "max_size");
127        assert_eq!(to_snake("parse_input"), "parse_input");
128        assert_eq!(to_snake("Http2Server"), "http2_server");
129        assert_eq!(to_snake("A"), "a");
130        assert_eq!(to_snake("VersionReq"), "version_req");
131    }
132
133    #[test]
134    fn keyword_stems_are_sanitized() {
135        // `Match` → `match` is a keyword, must be escaped.
136        assert_eq!(to_snake("Match"), "match_");
137        assert_eq!(to_snake("Type"), "type_");
138        assert!(!is_keyword("match_"));
139    }
140}
141
142/// If the remainder of the line after `end` is only a trailing `//` line comment,
143/// extend `end` to include it (but not the newline). Otherwise return `end`.
144pub fn extend_trailing_comment(src: &str, end: usize) -> usize {
145    let bytes = src.as_bytes();
146    let line_end = src[end..].find('\n').map(|i| end + i).unwrap_or(src.len());
147    let rest = &src[end..line_end];
148    let trimmed = rest.trim_start();
149    if trimmed.starts_with("//") {
150        // Make sure we didn't just clip into a `///` that belongs elsewhere; trailing
151        // doc comments after an item are unusual, treat them as trailing text anyway.
152        let _ = bytes;
153        line_end
154    } else {
155        end
156    }
157}
split_modules/util.rs

split_modules/
util.rs