forgekit-core 0.5.0

Deterministic code intelligence SDK - Core library
Documentation
use std::path::Path;

pub(crate) fn language_from_extension(path: &Path) -> crate::types::Language {
    match path.extension().and_then(|e| e.to_str()) {
        Some("rs") => crate::types::Language::Rust,
        Some("py") => crate::types::Language::Python,
        Some("java") => crate::types::Language::Java,
        Some("ts") | Some("tsx") => crate::types::Language::TypeScript,
        Some("js") | Some("jsx") => crate::types::Language::JavaScript,
        Some("c") | Some("h") => crate::types::Language::C,
        Some("cpp") | Some("cc") | Some("cxx") | Some("hpp") => crate::types::Language::Cpp,
        Some("go") => crate::types::Language::Go,
        _ => crate::types::Language::Unknown("".to_string()),
    }
}

pub(crate) fn identifier_spans(
    content: &[u8],
    name: &str,
    lang: crate::types::Language,
) -> Vec<(usize, usize)> {
    let name_bytes = name.as_bytes();
    let name_len = name_bytes.len();
    let content_len = content.len();
    let mut spans = Vec::new();
    if name_len == 0 || content_len < name_len {
        return spans;
    }

    let qual_prefixes = qualified_prefixes(&lang);

    let mut i = 0;
    while i + name_len <= content_len {
        if &content[i..i + name_len] == name_bytes {
            let before_ok = i == 0 || !is_ident_char(content[i - 1], &lang);
            let after_ok =
                i + name_len == content_len || !is_ident_char(content[i + name_len], &lang);
            if before_ok && after_ok {
                spans.push((i, i + name_len));
            }
        }

        for prefix in &qual_prefixes {
            let full = format!("{}{}", prefix, name);
            let full_bytes = full.as_bytes();
            let full_len = full_bytes.len();
            if i + full_len <= content_len && &content[i..i + full_len] == full_bytes {
                let qual_name_start = i + prefix.len();
                let before_ok =
                    qual_name_start == 0 || !is_ident_char(content[qual_name_start - 1], &lang);
                let after_ok = qual_name_start + name_len == content_len
                    || !is_ident_char(content[qual_name_start + name_len], &lang);
                if before_ok && after_ok {
                    let already = spans
                        .iter()
                        .any(|(s, e)| *s == qual_name_start && *e == qual_name_start + name_len);
                    if !already {
                        spans.push((qual_name_start, qual_name_start + name_len));
                    }
                }
            }
        }

        i += 1;
    }
    spans
}

fn qualified_prefixes(lang: &crate::types::Language) -> Vec<&'static str> {
    match lang {
        crate::types::Language::Rust => vec!["self.", "crate::", "super::"],
        crate::types::Language::Python => vec!["self.", "cls."],
        crate::types::Language::Java => vec!["this."],
        crate::types::Language::TypeScript => vec!["this."],
        crate::types::Language::JavaScript => vec!["this."],
        crate::types::Language::C => vec!["struct ", "enum "],
        crate::types::Language::Cpp => vec!["this->", "struct ", "enum ", "class "],
        crate::types::Language::Go => vec![],
        crate::types::Language::Unknown(_) => vec![],
    }
}

fn is_ident_char(b: u8, lang: &crate::types::Language) -> bool {
    match lang {
        crate::types::Language::Rust => b.is_ascii_alphanumeric() || b == b'_',
        crate::types::Language::Python => b.is_ascii_alphanumeric() || b == b'_',
        crate::types::Language::Java => b.is_ascii_alphanumeric() || b == b'_',
        crate::types::Language::TypeScript => b.is_ascii_alphanumeric() || b == b'_' || b == b'$',
        crate::types::Language::JavaScript => b.is_ascii_alphanumeric() || b == b'_' || b == b'$',
        crate::types::Language::C => b.is_ascii_alphanumeric() || b == b'_',
        crate::types::Language::Cpp => b.is_ascii_alphanumeric() || b == b'_',
        crate::types::Language::Go => b.is_ascii_alphanumeric() || b == b'_',
        crate::types::Language::Unknown(_) => b.is_ascii_alphanumeric() || b == b'_',
    }
}