i18n-audit 0.1.0

use crate::report::{DynamicKeyEntry, LocationEntry};
use crate::support::PathNormalizer;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use walkdir::WalkDir;

#[derive(Debug, Clone, Default)]
pub struct ScanResult {
    used_keys: BTreeSet<String>,
    used_key_locations: BTreeMap<String, Vec<LocationEntry>>,
    dynamic_keys: Vec<DynamicKeyEntry>,
    dynamic_keys_index: BTreeSet<String>,
}

impl ScanResult {
    pub fn add_used_key(
        &mut self,
        key: &str,
        file: Option<&str>,
        line: usize,
        column: usize,
        char_pos: usize,
        source: &str,
    ) {
        let trimmed = key.trim();
        if trimmed.is_empty() {
            return;
        }

        self.used_keys.insert(trimmed.to_string());

        let Some(file_path) = file else {
            return;
        };

        let normalized_file = PathNormalizer::normalize(file_path);
        let entry = LocationEntry {
            file: normalized_file.clone(),
            line: line.max(1),
            column: column.max(1),
            char_pos: char_pos.max(1),
            source: source.to_string(),
        };

        let signature = format!(
            "{}|{}|{}|{}|{}|{}",
            trimmed.to_lowercase(),
            entry.line,
            entry.column,
            entry.char_pos,
            normalized_file.to_lowercase(),
            entry.source.to_lowercase()
        );

        let entries = self
            .used_key_locations
            .entry(trimmed.to_string())
            .or_default();

        let already_exists = entries.iter().any(|existing| {
            format!(
                "{}|{}|{}|{}|{}|{}",
                trimmed.to_lowercase(),
                existing.line,
                existing.column,
                existing.char_pos,
                existing.file.to_lowercase(),
                existing.source.to_lowercase()
            ) == signature
        });

        if !already_exists {
            entries.push(entry);
        }
    }

    pub fn add_dynamic_key(&mut self, file: &str, line: usize, expression: &str, source: &str) {
        let entry = DynamicKeyEntry {
            file: PathNormalizer::normalize(file),
            line: line.max(1),
            expression: expression.to_string(),
            source: source.to_string(),
        };

        let signature = format!(
            "{}|{}|{}|{}",
            entry.file.to_lowercase(),
            entry.line,
            entry.source.to_lowercase(),
            entry.expression.to_lowercase()
        );

        if self.dynamic_keys_index.insert(signature) {
            self.dynamic_keys.push(entry);
        }
    }

    pub fn merge(&mut self, other: ScanResult) {
        for key in other.get_used_keys() {
            self.used_keys.insert(key);
        }

        for (key, locations) in other.get_used_key_locations() {
            for location in locations {
                self.add_used_key(
                    &key,
                    Some(&location.file),
                    location.line,
                    location.column,
                    location.char_pos,
                    &location.source,
                );
            }
        }

        for dynamic in other.get_dynamic_keys() {
            self.add_dynamic_key(&dynamic.file, dynamic.line, &dynamic.expression, &dynamic.source);
        }
    }

    pub fn get_used_keys(&self) -> Vec<String> {
        self.used_keys.iter().cloned().collect()
    }

    pub fn get_used_key_locations(&self) -> BTreeMap<String, Vec<LocationEntry>> {
        let mut output = self.used_key_locations.clone();
        for locations in output.values_mut() {
            locations.sort_by_key(|entry| {
                (
                    entry.file.clone(),
                    entry.line,
                    entry.column,
                    entry.char_pos,
                    entry.source.clone(),
                )
            });
        }
        output
    }

    pub fn get_dynamic_keys(&self) -> Vec<DynamicKeyEntry> {
        let mut output = self.dynamic_keys.clone();
        output.sort_by_key(|entry| (entry.file.clone(), entry.line));
        output
    }
}

#[derive(Debug, Clone)]
pub struct RustSourceScanner {
    translation_calls: Vec<String>,
}

impl Default for RustSourceScanner {
    fn default() -> Self {
        Self {
            translation_calls: vec![
                "t".to_string(),
                "tr".to_string(),
                "gettext".to_string(),
                "dgettext".to_string(),
                "ngettext".to_string(),
                "fl".to_string(),
            ],
        }
    }
}

impl RustSourceScanner {
    pub fn scan(&self, paths: &[String], exclude_paths: &[String], follow_symlinks: bool) -> ScanResult {
        let mut result = ScanResult::default();
        let normalized_excludes: Vec<String> = exclude_paths
            .iter()
            .map(|path| PathNormalizer::normalize(path))
            .collect();

        for path in paths {
            let walker = WalkDir::new(path)
                .follow_links(follow_symlinks)
                .into_iter()
                .filter_map(Result::ok);

            for entry in walker {
                if !entry.file_type().is_file() {
                    continue;
                }

                let file_path = PathNormalizer::normalize(&entry.path().to_string_lossy());
                if self.should_exclude(&file_path, &normalized_excludes) {
                    continue;
                }

                if !file_path.ends_with(".rs") {
                    continue;
                }

                self.scan_file(&file_path, &mut result);
            }
        }

        result
    }

    fn scan_file(&self, file_path: &str, result: &mut ScanResult) {
        let Ok(content) = fs::read_to_string(file_path) else {
            return;
        };

        if content.trim().is_empty() {
            return;
        }

        let bytes = content.as_bytes();
        let mut idx = 0usize;
        while idx < bytes.len() {
            if !is_identifier_start(bytes[idx]) {
                idx += 1;
                continue;
            }

            let start = idx;
            idx += 1;
            while idx < bytes.len() && is_identifier_continue(bytes[idx]) {
                idx += 1;
            }

            let mut end = idx;
            while idx + 1 < bytes.len() && bytes[idx] == b':' && bytes[idx + 1] == b':' {
                idx += 2;
                if idx >= bytes.len() || !is_identifier_start(bytes[idx]) {
                    break;
                }
                idx += 1;
                while idx < bytes.len() && is_identifier_continue(bytes[idx]) {
                    idx += 1;
                }
                end = idx;
            }

            let callee = &content[start..end];
            let symbol = callee.rsplit("::").next().unwrap_or(callee);
            if !self
                .translation_calls
                .iter()
                .any(|name| name.eq_ignore_ascii_case(symbol))
            {
                continue;
            }

            let mut cursor = idx;
            while cursor < bytes.len() && bytes[cursor].is_ascii_whitespace() {
                cursor += 1;
            }
            let mut is_macro = false;
            if cursor < bytes.len() && bytes[cursor] == b'!' {
                is_macro = true;
                cursor += 1;
                while cursor < bytes.len() && bytes[cursor].is_ascii_whitespace() {
                    cursor += 1;
                }
            }

            if cursor >= bytes.len() || bytes[cursor] != b'(' {
                continue;
            }

            let source = if is_macro {
                format!("{}!()", symbol)
            } else {
                format!("{}()", symbol)
            };

            let open_paren_idx = cursor;
            let first_arg_start = skip_ws(bytes, open_paren_idx + 1);
            if first_arg_start >= bytes.len() {
                continue;
            }

            if let Some((key, consumed)) = parse_string_literal(&content, first_arg_start) {
                let (line, column) = line_col_from_offset(&content, first_arg_start);
                result.add_used_key(
                    &key,
                    Some(file_path),
                    line,
                    column,
                    first_arg_start + 1,
                    &source,
                );
                idx = consumed;
                continue;
            }

            if let Some((expression, consumed)) = parse_dynamic_expression(&content, first_arg_start) {
                if !expression.trim().is_empty() {
                    let (line, _) = line_col_from_offset(&content, first_arg_start);
                    result.add_dynamic_key(file_path, line, expression.trim(), &source);
                }
                idx = consumed;
            }
        }
    }

    fn should_exclude(&self, file_path: &str, excludes: &[String]) -> bool {
        let normalized_file = PathNormalizer::normalize(file_path).to_lowercase();
        excludes
            .iter()
            .map(|entry| PathNormalizer::normalize(entry).to_lowercase())
            .any(|exclude| !exclude.is_empty() && normalized_file.contains(&exclude))
    }
}

fn parse_dynamic_expression(content: &str, start: usize) -> Option<(String, usize)> {
    let bytes = content.as_bytes();
    let mut i = start;
    let mut paren_depth = 0usize;
    let mut bracket_depth = 0usize;
    let mut brace_depth = 0usize;

    while i < bytes.len() {
        match bytes[i] {
            b'"' => {
                let (_, consumed) = parse_string_literal(content, i)?;
                i = consumed;
                continue;
            }
            b'r' if i + 1 < bytes.len() && (bytes[i + 1] == b'"' || bytes[i + 1] == b'#') => {
                if let Some((_, consumed)) = parse_string_literal(content, i) {
                    i = consumed;
                    continue;
                }
            }
            b'(' => paren_depth += 1,
            b')' => {
                if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 {
                    return Some((content[start..i].to_string(), i + 1));
                }
                paren_depth = paren_depth.saturating_sub(1);
            }
            b',' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
                return Some((content[start..i].to_string(), i + 1));
            }
            b'[' => bracket_depth += 1,
            b']' => bracket_depth = bracket_depth.saturating_sub(1),
            b'{' => brace_depth += 1,
            b'}' => brace_depth = brace_depth.saturating_sub(1),
            _ => {}
        }
        i += 1;
    }

    Some((content[start..].to_string(), bytes.len()))
}

fn parse_string_literal(content: &str, start: usize) -> Option<(String, usize)> {
    let bytes = content.as_bytes();
    if start >= bytes.len() {
        return None;
    }

    if bytes[start] == b'"' {
        let mut i = start + 1;
        let mut escaped = false;
        while i < bytes.len() {
            let ch = bytes[i];
            if escaped {
                escaped = false;
                i += 1;
                continue;
            }

            if ch == b'\\' {
                escaped = true;
                i += 1;
                continue;
            }

            if ch == b'"' {
                let raw = &content[start..=i];
                let unescaped = serde_json::from_str::<String>(raw).unwrap_or_else(|_| {
                    raw.trim_matches('"').replace("\\\"", "\"")
                });
                return Some((unescaped, i + 1));
            }

            i += 1;
        }

        return None;
    }

    if bytes[start] == b'r' {
        let mut hashes = 0usize;
        let mut i = start + 1;
        while i < bytes.len() && bytes[i] == b'#' {
            hashes += 1;
            i += 1;
        }

        if i >= bytes.len() || bytes[i] != b'"' {
            return None;
        }

        i += 1;
        let content_start = i;
        let mut end_pattern = String::from("\"");
        end_pattern.push_str(&"#".repeat(hashes));

        if let Some(pos) = content[content_start..].find(&end_pattern) {
            let end = content_start + pos;
            let key = content[content_start..end].to_string();
            return Some((key, end + end_pattern.len()));
        }
    }

    None
}

fn line_col_from_offset(content: &str, offset: usize) -> (usize, usize) {
    let prefix = &content[..offset.min(content.len())];
    let line = prefix.bytes().filter(|byte| *byte == b'\n').count() + 1;
    let col = prefix
        .rfind('\n')
        .map_or(prefix.chars().count() + 1, |pos| prefix[pos + 1..].chars().count() + 1);
    (line, col)
}

fn skip_ws(bytes: &[u8], mut pos: usize) -> usize {
    while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
        pos += 1;
    }
    pos
}

fn is_identifier_start(byte: u8) -> bool {
    byte.is_ascii_alphabetic() || byte == b'_'
}

fn is_identifier_continue(byte: u8) -> bool {
    byte.is_ascii_alphanumeric() || byte == b'_'
}