Skip to main content

ffmt/
case_norm.rs

1/// Normalize case of Fortran keywords in a source line.
2///
3/// Rules:
4/// - Fypp directive lines (`$:`, `@:`, `#:`, `#!`) are returned unchanged.
5/// - String literals (`'...'`, `"..."`), Fypp inline expressions (`${...}$`,
6///   `@{...}@`), and trailing comments (`! ...`) are opaque — never modified.
7/// - Fortran keywords and dot operators (`.AND.`, `.TRUE.`, etc.) are
8///   lowercased in non-opaque regions.
9/// - All other text (identifiers, numbers, punctuation) is preserved.
10pub fn normalize_case(line: &str) -> String {
11    let trimmed = line.trim_start();
12
13    // If the line is a Fypp directive, return unchanged.
14    if trimmed.starts_with("$:")
15        || trimmed.starts_with("@:")
16        || trimmed.starts_with("#:")
17        || trimmed.starts_with("#!")
18    {
19        return line.to_string();
20    }
21
22    let bytes = line.as_bytes();
23    let len = bytes.len();
24    let mut out = String::with_capacity(len);
25    let mut i = 0;
26
27    while i < len {
28        let ch = bytes[i];
29
30        // --- String literals: pass through unchanged ---
31        if ch == b'\'' || ch == b'"' {
32            let quote = ch;
33            let start = i;
34            i += 1;
35            while i < len {
36                if bytes[i] == quote {
37                    i += 1;
38                    // Doubled quote is an escape sequence, continue
39                    if i < len && bytes[i] == quote {
40                        i += 1;
41                        continue;
42                    }
43                    break;
44                }
45                i += 1;
46            }
47            out.push_str(&line[start..i]);
48            continue;
49        }
50
51        // --- Fypp inline expressions: ${...}$ ---
52        if ch == b'$' && i + 1 < len && bytes[i + 1] == b'{' {
53            let start = i;
54            i += 2;
55            let mut depth = 1usize;
56            while i < len && depth > 0 {
57                if bytes[i] == b'{' {
58                    depth += 1;
59                } else if bytes[i] == b'}' {
60                    depth -= 1;
61                    if depth == 0 {
62                        i += 1;
63                        // consume trailing $
64                        if i < len && bytes[i] == b'$' {
65                            i += 1;
66                        }
67                        break;
68                    }
69                }
70                i += 1;
71            }
72            out.push_str(&line[start..i]);
73            continue;
74        }
75
76        // --- Fypp inline expressions: @{...}@ ---
77        if ch == b'@' && i + 1 < len && bytes[i + 1] == b'{' {
78            let start = i;
79            i += 2;
80            let mut depth = 1usize;
81            while i < len && depth > 0 {
82                if bytes[i] == b'{' {
83                    depth += 1;
84                } else if bytes[i] == b'}' {
85                    depth -= 1;
86                    if depth == 0 {
87                        i += 1;
88                        // consume trailing @
89                        if i < len && bytes[i] == b'@' {
90                            i += 1;
91                        }
92                        break;
93                    }
94                }
95                i += 1;
96            }
97            out.push_str(&line[start..i]);
98            continue;
99        }
100
101        // --- Inline comment: everything from ! to end-of-line is opaque ---
102        if ch == b'!' {
103            out.push_str(&line[i..]);
104            break;
105        }
106
107        // --- Dot operators: .TRUE., .AND., etc. ---
108        if ch == b'.' && i + 2 < len {
109            if let Some((dot_end, _)) = crate::match_dot_token(bytes, i) {
110                // Lowercase the entire dot keyword/operator token
111                let op_str = &line[i..dot_end];
112                out.push_str(&op_str.to_ascii_lowercase());
113                i = dot_end;
114                continue;
115            }
116        }
117
118        // --- Word boundary: check for keyword ---
119        if ch.is_ascii_alphabetic() || ch == b'_' {
120            let start = i;
121            while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
122                i += 1;
123            }
124            let word = &line[start..i];
125            if is_keyword(word) {
126                out.push_str(&word.to_ascii_lowercase());
127            } else {
128                out.push_str(word);
129            }
130            continue;
131        }
132
133        // --- Everything else: pass through unchanged ---
134        out.push(ch as char);
135        i += 1;
136    }
137
138    out
139}
140
141/// Return true if `word` is a Fortran keyword (case-insensitive).
142fn is_keyword(word: &str) -> bool {
143    let lower = word.to_ascii_lowercase();
144    KEYWORDS.binary_search(&lower.as_str()).is_ok()
145}
146
147/// All Fortran keywords that should be lowercased.
148/// Must remain in lexicographic (ASCII) order for binary search.
149const KEYWORDS: &[&str] = &[
150    "abstract",
151    "allocatable",
152    "allocate",
153    "associate",
154    "asynchronous",
155    "backspace",
156    "bind",
157    "block",
158    "call",
159    "case",
160    "change",
161    "character",
162    "class",
163    "close",
164    "codimension",
165    "common",
166    "complex",
167    "contains",
168    "contiguous",
169    "continue",
170    "critical",
171    "cycle",
172    "data",
173    "deallocate",
174    "default",
175    "deferred",
176    "dimension",
177    "do",
178    "double",
179    "elemental",
180    "else",
181    "elsewhere",
182    "end",
183    "endfile",
184    "entry",
185    "enum",
186    "enumerator",
187    "equivalence",
188    "error",
189    "event",
190    "exit",
191    "extends",
192    "external",
193    "fail",
194    "final",
195    "flush",
196    "forall",
197    "form",
198    "format",
199    "function",
200    "generic",
201    "go",
202    "if",
203    "image",
204    "implicit",
205    "import",
206    "impure",
207    "in",
208    "inout",
209    "inquire",
210    "integer",
211    "intent",
212    "interface",
213    "intrinsic",
214    "lock",
215    "logical",
216    "module",
217    "namelist",
218    "non_overridable",
219    "none",
220    "nopass",
221    "nullify",
222    "only",
223    "open",
224    "optional",
225    "out",
226    "parameter",
227    "pass",
228    "pointer",
229    "precision",
230    "print",
231    "private",
232    "procedure",
233    "program",
234    "protected",
235    "public",
236    "pure",
237    "rank",
238    "read",
239    "real",
240    "recursive",
241    "result",
242    "return",
243    "rewind",
244    "save",
245    "select",
246    "sequence",
247    "stop",
248    "submodule",
249    "subroutine",
250    "sync",
251    "target",
252    "team",
253    "then",
254    "to",
255    "type",
256    "unlock",
257    "use",
258    "value",
259    "volatile",
260    "wait",
261    "where",
262    "while",
263    "write",
264];
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn test_keyword_list_sorted() {
272        // Ensure KEYWORDS is sorted so binary search would work correctly
273        for i in 1..KEYWORDS.len() {
274            assert!(
275                KEYWORDS[i - 1] < KEYWORDS[i],
276                "KEYWORDS not sorted at index {}: '{}' >= '{}'",
277                i,
278                KEYWORDS[i - 1],
279                KEYWORDS[i]
280            );
281        }
282    }
283}