Skip to main content

i18n_audit_rust/
scanner.rs

1use crate::report::{DynamicKeyEntry, LocationEntry};
2use crate::support::PathNormalizer;
3use std::collections::{BTreeMap, BTreeSet};
4use std::fs;
5use walkdir::WalkDir;
6
7#[derive(Debug, Clone, Default)]
8pub struct ScanResult {
9    used_keys: BTreeSet<String>,
10    used_key_locations: BTreeMap<String, Vec<LocationEntry>>,
11    dynamic_keys: Vec<DynamicKeyEntry>,
12    dynamic_keys_index: BTreeSet<String>,
13}
14
15impl ScanResult {
16    pub fn add_used_key(
17        &mut self,
18        key: &str,
19        file: Option<&str>,
20        line: usize,
21        column: usize,
22        char_pos: usize,
23        source: &str,
24    ) {
25        let trimmed = key.trim();
26        if trimmed.is_empty() {
27            return;
28        }
29
30        self.used_keys.insert(trimmed.to_string());
31
32        let Some(file_path) = file else {
33            return;
34        };
35
36        let normalized_file = PathNormalizer::normalize(file_path);
37        let entry = LocationEntry {
38            file: normalized_file.clone(),
39            line: line.max(1),
40            column: column.max(1),
41            char_pos: char_pos.max(1),
42            source: source.to_string(),
43        };
44
45        let signature = format!(
46            "{}|{}|{}|{}|{}|{}",
47            trimmed.to_lowercase(),
48            entry.line,
49            entry.column,
50            entry.char_pos,
51            normalized_file.to_lowercase(),
52            entry.source.to_lowercase()
53        );
54
55        let entries = self
56            .used_key_locations
57            .entry(trimmed.to_string())
58            .or_default();
59
60        let already_exists = entries.iter().any(|existing| {
61            format!(
62                "{}|{}|{}|{}|{}|{}",
63                trimmed.to_lowercase(),
64                existing.line,
65                existing.column,
66                existing.char_pos,
67                existing.file.to_lowercase(),
68                existing.source.to_lowercase()
69            ) == signature
70        });
71
72        if !already_exists {
73            entries.push(entry);
74        }
75    }
76
77    pub fn add_dynamic_key(&mut self, file: &str, line: usize, expression: &str, source: &str) {
78        let entry = DynamicKeyEntry {
79            file: PathNormalizer::normalize(file),
80            line: line.max(1),
81            expression: expression.to_string(),
82            source: source.to_string(),
83        };
84
85        let signature = format!(
86            "{}|{}|{}|{}",
87            entry.file.to_lowercase(),
88            entry.line,
89            entry.source.to_lowercase(),
90            entry.expression.to_lowercase()
91        );
92
93        if self.dynamic_keys_index.insert(signature) {
94            self.dynamic_keys.push(entry);
95        }
96    }
97
98    pub fn merge(&mut self, other: ScanResult) {
99        for key in other.get_used_keys() {
100            self.used_keys.insert(key);
101        }
102
103        for (key, locations) in other.get_used_key_locations() {
104            for location in locations {
105                self.add_used_key(
106                    &key,
107                    Some(&location.file),
108                    location.line,
109                    location.column,
110                    location.char_pos,
111                    &location.source,
112                );
113            }
114        }
115
116        for dynamic in other.get_dynamic_keys() {
117            self.add_dynamic_key(&dynamic.file, dynamic.line, &dynamic.expression, &dynamic.source);
118        }
119    }
120
121    pub fn get_used_keys(&self) -> Vec<String> {
122        self.used_keys.iter().cloned().collect()
123    }
124
125    pub fn get_used_key_locations(&self) -> BTreeMap<String, Vec<LocationEntry>> {
126        let mut output = self.used_key_locations.clone();
127        for locations in output.values_mut() {
128            locations.sort_by_key(|entry| {
129                (
130                    entry.file.clone(),
131                    entry.line,
132                    entry.column,
133                    entry.char_pos,
134                    entry.source.clone(),
135                )
136            });
137        }
138        output
139    }
140
141    pub fn get_dynamic_keys(&self) -> Vec<DynamicKeyEntry> {
142        let mut output = self.dynamic_keys.clone();
143        output.sort_by_key(|entry| (entry.file.clone(), entry.line));
144        output
145    }
146}
147
148#[derive(Debug, Clone)]
149pub struct RustSourceScanner {
150    translation_calls: Vec<String>,
151}
152
153impl Default for RustSourceScanner {
154    fn default() -> Self {
155        Self {
156            translation_calls: vec![
157                "t".to_string(),
158                "tr".to_string(),
159                "gettext".to_string(),
160                "dgettext".to_string(),
161                "ngettext".to_string(),
162                "fl".to_string(),
163            ],
164        }
165    }
166}
167
168impl RustSourceScanner {
169    pub fn scan(&self, paths: &[String], exclude_paths: &[String], follow_symlinks: bool) -> ScanResult {
170        let mut result = ScanResult::default();
171        let normalized_excludes: Vec<String> = exclude_paths
172            .iter()
173            .map(|path| PathNormalizer::normalize(path))
174            .collect();
175
176        for path in paths {
177            let walker = WalkDir::new(path)
178                .follow_links(follow_symlinks)
179                .into_iter()
180                .filter_map(Result::ok);
181
182            for entry in walker {
183                if !entry.file_type().is_file() {
184                    continue;
185                }
186
187                let file_path = PathNormalizer::normalize(&entry.path().to_string_lossy());
188                if self.should_exclude(&file_path, &normalized_excludes) {
189                    continue;
190                }
191
192                if !file_path.ends_with(".rs") {
193                    continue;
194                }
195
196                self.scan_file(&file_path, &mut result);
197            }
198        }
199
200        result
201    }
202
203    fn scan_file(&self, file_path: &str, result: &mut ScanResult) {
204        let Ok(content) = fs::read_to_string(file_path) else {
205            return;
206        };
207
208        if content.trim().is_empty() {
209            return;
210        }
211
212        let bytes = content.as_bytes();
213        let mut idx = 0usize;
214        while idx < bytes.len() {
215            if !is_identifier_start(bytes[idx]) {
216                idx += 1;
217                continue;
218            }
219
220            let start = idx;
221            idx += 1;
222            while idx < bytes.len() && is_identifier_continue(bytes[idx]) {
223                idx += 1;
224            }
225
226            let mut end = idx;
227            while idx + 1 < bytes.len() && bytes[idx] == b':' && bytes[idx + 1] == b':' {
228                idx += 2;
229                if idx >= bytes.len() || !is_identifier_start(bytes[idx]) {
230                    break;
231                }
232                idx += 1;
233                while idx < bytes.len() && is_identifier_continue(bytes[idx]) {
234                    idx += 1;
235                }
236                end = idx;
237            }
238
239            let callee = &content[start..end];
240            let symbol = callee.rsplit("::").next().unwrap_or(callee);
241            if !self
242                .translation_calls
243                .iter()
244                .any(|name| name.eq_ignore_ascii_case(symbol))
245            {
246                continue;
247            }
248
249            let mut cursor = idx;
250            while cursor < bytes.len() && bytes[cursor].is_ascii_whitespace() {
251                cursor += 1;
252            }
253            let mut is_macro = false;
254            if cursor < bytes.len() && bytes[cursor] == b'!' {
255                is_macro = true;
256                cursor += 1;
257                while cursor < bytes.len() && bytes[cursor].is_ascii_whitespace() {
258                    cursor += 1;
259                }
260            }
261
262            if cursor >= bytes.len() || bytes[cursor] != b'(' {
263                continue;
264            }
265
266            let source = if is_macro {
267                format!("{}!()", symbol)
268            } else {
269                format!("{}()", symbol)
270            };
271
272            let open_paren_idx = cursor;
273            let first_arg_start = skip_ws(bytes, open_paren_idx + 1);
274            if first_arg_start >= bytes.len() {
275                continue;
276            }
277
278            if let Some((key, consumed)) = parse_string_literal(&content, first_arg_start) {
279                let (line, column) = line_col_from_offset(&content, first_arg_start);
280                result.add_used_key(
281                    &key,
282                    Some(file_path),
283                    line,
284                    column,
285                    first_arg_start + 1,
286                    &source,
287                );
288                idx = consumed;
289                continue;
290            }
291
292            if let Some((expression, consumed)) = parse_dynamic_expression(&content, first_arg_start) {
293                if !expression.trim().is_empty() {
294                    let (line, _) = line_col_from_offset(&content, first_arg_start);
295                    result.add_dynamic_key(file_path, line, expression.trim(), &source);
296                }
297                idx = consumed;
298            }
299        }
300    }
301
302    fn should_exclude(&self, file_path: &str, excludes: &[String]) -> bool {
303        let normalized_file = PathNormalizer::normalize(file_path).to_lowercase();
304        excludes
305            .iter()
306            .map(|entry| PathNormalizer::normalize(entry).to_lowercase())
307            .any(|exclude| !exclude.is_empty() && normalized_file.contains(&exclude))
308    }
309}
310
311fn parse_dynamic_expression(content: &str, start: usize) -> Option<(String, usize)> {
312    let bytes = content.as_bytes();
313    let mut i = start;
314    let mut paren_depth = 0usize;
315    let mut bracket_depth = 0usize;
316    let mut brace_depth = 0usize;
317
318    while i < bytes.len() {
319        match bytes[i] {
320            b'"' => {
321                let (_, consumed) = parse_string_literal(content, i)?;
322                i = consumed;
323                continue;
324            }
325            b'r' if i + 1 < bytes.len() && (bytes[i + 1] == b'"' || bytes[i + 1] == b'#') => {
326                if let Some((_, consumed)) = parse_string_literal(content, i) {
327                    i = consumed;
328                    continue;
329                }
330            }
331            b'(' => paren_depth += 1,
332            b')' => {
333                if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 {
334                    return Some((content[start..i].to_string(), i + 1));
335                }
336                paren_depth = paren_depth.saturating_sub(1);
337            }
338            b',' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
339                return Some((content[start..i].to_string(), i + 1));
340            }
341            b'[' => bracket_depth += 1,
342            b']' => bracket_depth = bracket_depth.saturating_sub(1),
343            b'{' => brace_depth += 1,
344            b'}' => brace_depth = brace_depth.saturating_sub(1),
345            _ => {}
346        }
347        i += 1;
348    }
349
350    Some((content[start..].to_string(), bytes.len()))
351}
352
353fn parse_string_literal(content: &str, start: usize) -> Option<(String, usize)> {
354    let bytes = content.as_bytes();
355    if start >= bytes.len() {
356        return None;
357    }
358
359    if bytes[start] == b'"' {
360        let mut i = start + 1;
361        let mut escaped = false;
362        while i < bytes.len() {
363            let ch = bytes[i];
364            if escaped {
365                escaped = false;
366                i += 1;
367                continue;
368            }
369
370            if ch == b'\\' {
371                escaped = true;
372                i += 1;
373                continue;
374            }
375
376            if ch == b'"' {
377                let raw = &content[start..=i];
378                let unescaped = serde_json::from_str::<String>(raw).unwrap_or_else(|_| {
379                    raw.trim_matches('"').replace("\\\"", "\"")
380                });
381                return Some((unescaped, i + 1));
382            }
383
384            i += 1;
385        }
386
387        return None;
388    }
389
390    if bytes[start] == b'r' {
391        let mut hashes = 0usize;
392        let mut i = start + 1;
393        while i < bytes.len() && bytes[i] == b'#' {
394            hashes += 1;
395            i += 1;
396        }
397
398        if i >= bytes.len() || bytes[i] != b'"' {
399            return None;
400        }
401
402        i += 1;
403        let content_start = i;
404        let mut end_pattern = String::from("\"");
405        end_pattern.push_str(&"#".repeat(hashes));
406
407        if let Some(pos) = content[content_start..].find(&end_pattern) {
408            let end = content_start + pos;
409            let key = content[content_start..end].to_string();
410            return Some((key, end + end_pattern.len()));
411        }
412    }
413
414    None
415}
416
417fn line_col_from_offset(content: &str, offset: usize) -> (usize, usize) {
418    let prefix = &content[..offset.min(content.len())];
419    let line = prefix.bytes().filter(|byte| *byte == b'\n').count() + 1;
420    let col = prefix
421        .rfind('\n')
422        .map_or(prefix.chars().count() + 1, |pos| prefix[pos + 1..].chars().count() + 1);
423    (line, col)
424}
425
426fn skip_ws(bytes: &[u8], mut pos: usize) -> usize {
427    while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
428        pos += 1;
429    }
430    pos
431}
432
433fn is_identifier_start(byte: u8) -> bool {
434    byte.is_ascii_alphabetic() || byte == b'_'
435}
436
437fn is_identifier_continue(byte: u8) -> bool {
438    byte.is_ascii_alphanumeric() || byte == b'_'
439}