search-semantically 0.1.10

Embeddable semantic code search with multi-signal POEM ranking
Documentation
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum QueryType {
    Identifier,
    NaturalLanguage,
    PathLike,
}

fn has_camel_case(s: &str) -> bool {
    let bytes = s.as_bytes();
    for i in 1..bytes.len() {
        if bytes[i].is_ascii_uppercase() && bytes[i - 1].is_ascii_lowercase() {
            return true;
        }
    }
    false
}

fn has_snake_case(s: &str) -> bool {
    s.contains('_') && s.chars().any(|c| c.is_alphanumeric())
}

fn is_screaming_snake(s: &str) -> bool {
    if !s.contains('_') {
        return false;
    }
    s.chars()
        .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
}

pub fn classify_query(query: &str) -> QueryType {
    let trimmed = query.trim();
    if trimmed.is_empty() {
        return QueryType::NaturalLanguage;
    }

    if trimmed.contains('/') || trimmed.contains('\\') {
        return QueryType::PathLike;
    }

    if has_dotted_path(trimmed) {
        return QueryType::PathLike;
    }

    let words: Vec<&str> = trimmed.split_whitespace().collect();
    if words.len() == 1 && has_file_extension(trimmed) {
        return QueryType::PathLike;
    }

    if words.len() == 1 {
        return QueryType::Identifier;
    }

    if words.len() <= 3 {
        let looks_like_code = words
            .iter()
            .any(|w| has_camel_case(w) || has_snake_case(w) || is_screaming_snake(w));
        if looks_like_code {
            return QueryType::Identifier;
        }
    }

    QueryType::NaturalLanguage
}

fn has_dotted_path(s: &str) -> bool {
    let parts: Vec<&str> = s.split('.').collect();
    parts.len() >= 3
        && parts
            .iter()
            .all(|p| !p.is_empty() && p.chars().all(|c| c.is_alphanumeric() || c == '_'))
}

fn has_file_extension(s: &str) -> bool {
    if !s.contains('.') {
        return false;
    }
    let ext = s.rsplit('.').next().expect("should have part after dot");
    !ext.is_empty() && ext.len() <= 5 && ext.chars().all(|c| c.is_alphanumeric())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty_query_classified_as_natural_language() {
        assert_eq!(classify_query(""), QueryType::NaturalLanguage);
    }

    #[test]
    fn whitespace_only_query_classified_as_natural_language() {
        assert_eq!(classify_query("   "), QueryType::NaturalLanguage);
    }

    #[test]
    fn single_camel_case_word_is_identifier() {
        assert_eq!(classify_query("myFunction"), QueryType::Identifier);
    }

    #[test]
    fn single_snake_case_word_is_identifier() {
        assert_eq!(classify_query("my_function"), QueryType::Identifier);
    }

    #[test]
    fn single_lowercase_word_is_identifier() {
        assert_eq!(classify_query("search"), QueryType::Identifier);
    }

    #[test]
    fn single_uppercase_word_is_identifier() {
        assert_eq!(classify_query("Search"), QueryType::Identifier);
    }

    #[test]
    fn path_with_slash_is_path_like() {
        assert_eq!(classify_query("src/tools/mod.rs"), QueryType::PathLike);
    }

    #[test]
    fn path_with_backslash_is_path_like() {
        assert_eq!(classify_query("src\\tools\\mod.rs"), QueryType::PathLike);
    }

    #[test]
    fn dotted_path_three_segments_is_path_like() {
        assert_eq!(classify_query("foo.bar.baz"), QueryType::PathLike);
    }

    #[test]
    fn filename_with_extension_is_path_like() {
        assert_eq!(classify_query("config.yaml"), QueryType::PathLike);
    }

    #[test]
    fn screaming_snake_case_is_identifier() {
        assert_eq!(classify_query("MAX_SIZE DEFAULT"), QueryType::Identifier);
    }

    #[test]
    fn short_camel_case_phrase_is_identifier() {
        assert_eq!(
            classify_query("myFunction handles input"),
            QueryType::Identifier
        );
    }

    #[test]
    fn natural_language_sentence_is_natural_language() {
        assert_eq!(
            classify_query("find all places where database connections are established"),
            QueryType::NaturalLanguage
        );
    }

    #[test]
    fn short_mixed_words_without_code_patterns_is_natural_language() {
        assert_eq!(
            classify_query("how does this work"),
            QueryType::NaturalLanguage
        );
    }

    #[test]
    fn four_words_without_code_patterns_is_natural_language() {
        assert_eq!(
            classify_query("find the error handler function"),
            QueryType::NaturalLanguage
        );
    }
}