audb-codegen 0.1.11

//! Naming utilities for code generation
//!
//! This module provides utilities for converting schema names to collection names,
//! pluralizing entity names, and converting between naming conventions.
//!
//! ## Naming Conventions
//!
//! - **Schema names**: PascalCase (e.g., `User`, `BlogPost`)
//! - **Collection names**: Pluralized snake_case (e.g., `users`, `blog_posts`)
//! - **Field names**: snake_case (e.g., `user_id`, `created_at`)
//!
//! ## Examples
//!
//! ```
//! use audb_codegen::naming::{collection_name, pluralize, to_snake_case};
//!
//! assert_eq!(collection_name("User"), "users");
//! assert_eq!(collection_name("BlogPost"), "blog_posts");
//! assert_eq!(pluralize("person"), "people");
//! assert_eq!(to_snake_case("BlogPost"), "blog_post");
//! ```

use std::collections::HashMap;

/// Convert a PascalCase or camelCase string to snake_case
///
/// This function handles various edge cases including:
/// - Consecutive uppercase letters (e.g., `HTTPRequest` → `http_request`)
/// - Numbers (e.g., `User2FA` → `user2_fa`)
/// - Already snake_case strings (returned unchanged)
///
/// # Examples
///
/// ```
/// # use audb_codegen::naming::to_snake_case;
/// assert_eq!(to_snake_case("User"), "user");
/// assert_eq!(to_snake_case("BlogPost"), "blog_post");
/// assert_eq!(to_snake_case("HTTPRequest"), "http_request");
/// assert_eq!(to_snake_case("User2FA"), "user2_fa");
/// assert_eq!(to_snake_case("already_snake_case"), "already_snake_case");
/// assert_eq!(to_snake_case(""), "");
/// ```
pub fn to_snake_case(s: &str) -> String {
    if s.is_empty() {
        return String::new();
    }

    let mut result = String::with_capacity(s.len() + 4);
    let chars: Vec<char> = s.chars().collect();

    for (i, &ch) in chars.iter().enumerate() {
        if ch.is_uppercase() {
            // Don't add underscore at the start
            if i > 0 {
                let prev = chars[i - 1];
                let next = chars.get(i + 1);

                // Add underscore before uppercase letter if:
                // 1. Previous char is lowercase or digit
                // 2. Previous char is uppercase AND next char is lowercase (handles HTTPRequest)
                if prev.is_lowercase() || prev.is_ascii_digit() {
                    result.push('_');
                } else if prev.is_uppercase() && next.map_or(false, |c| c.is_lowercase()) {
                    result.push('_');
                }
            }
            result.push(ch.to_ascii_lowercase());
        } else {
            result.push(ch);
        }
    }

    result
}

/// Pluralize an English word using common rules
///
/// This function handles most common English pluralization rules including:
/// - Regular plurals (add 's')
/// - Words ending in s, x, z, ch, sh (add 'es')
/// - Words ending in consonant + y (change y to 'ies')
/// - Irregular plurals (person → people, child → children, etc.)
/// - Uncountable nouns (data, information, etc.)
///
/// # Examples
///
/// ```
/// # use audb_codegen::naming::pluralize;
/// // Regular plurals
/// assert_eq!(pluralize("user"), "users");
/// assert_eq!(pluralize("post"), "posts");
///
/// // Words ending in s, x, z, ch, sh
/// assert_eq!(pluralize("box"), "boxes");
/// assert_eq!(pluralize("class"), "classes");
/// assert_eq!(pluralize("church"), "churches");
///
/// // Words ending in consonant + y
/// assert_eq!(pluralize("story"), "stories");
/// assert_eq!(pluralize("category"), "categories");
///
/// // Irregular plurals
/// assert_eq!(pluralize("person"), "people");
/// assert_eq!(pluralize("child"), "children");
/// assert_eq!(pluralize("mouse"), "mice");
///
/// // Uncountable nouns
/// assert_eq!(pluralize("data"), "data");
/// assert_eq!(pluralize("information"), "information");
/// ```
pub fn pluralize(word: &str) -> String {
    if word.is_empty() {
        return String::new();
    }

    let lower = word.to_lowercase();

    // Irregular plurals
    let irregulars = get_irregular_plurals();
    if let Some(plural) = irregulars.get(lower.as_str()) {
        return apply_case_pattern(word, plural);
    }

    // Uncountable nouns (already plural or no plural form)
    if is_uncountable(&lower) {
        return word.to_string();
    }

    // Words ending in 's', 'ss', 'x', 'z', 'ch', 'sh' → add 'es'
    if lower.ends_with("s")
        || lower.ends_with("ss")
        || lower.ends_with("x")
        || lower.ends_with("z")
        || lower.ends_with("ch")
        || lower.ends_with("sh")
    {
        return format!("{}es", word);
    }

    // Words ending in consonant + 'y' → change 'y' to 'ies'
    if lower.ends_with('y') && lower.len() > 1 {
        let before_y = lower.chars().rev().nth(1).unwrap();
        if !is_vowel(before_y) {
            let stem = &word[..word.len() - 1];
            return format!("{}ies", stem);
        }
    }

    // Words ending in 'f' or 'fe' → change to 'ves'
    if lower.ends_with("fe") {
        let stem = &word[..word.len() - 2];
        return format!("{}ves", stem);
    }
    if lower.ends_with('f') {
        let stem = &word[..word.len() - 1];
        return format!("{}ves", stem);
    }

    // Words ending in consonant + 'o' → add 'es'
    if lower.ends_with('o') && lower.len() > 1 {
        let before_o = lower.chars().rev().nth(1).unwrap();
        if !is_vowel(before_o) {
            return format!("{}es", word);
        }
    }

    // Default: add 's'
    format!("{}s", word)
}

/// Generate a collection name from a schema name
///
/// Converts a PascalCase schema name to a pluralized snake_case collection name.
///
/// # Examples
///
/// ```
/// # use audb_codegen::naming::collection_name;
/// assert_eq!(collection_name("User"), "users");
/// assert_eq!(collection_name("BlogPost"), "blog_posts");
/// assert_eq!(collection_name("Person"), "people");
/// assert_eq!(collection_name("Category"), "categories");
/// ```
pub fn collection_name(schema_name: &str) -> String {
    let snake = to_snake_case(schema_name);
    pluralize(&snake)
}

/// Check if a character is a vowel
fn is_vowel(ch: char) -> bool {
    matches!(ch, 'a' | 'e' | 'i' | 'o' | 'u')
}

/// Check if a word is uncountable (no plural form or same as plural)
fn is_uncountable(word: &str) -> bool {
    matches!(
        word,
        "data"
            | "information"
            | "equipment"
            | "furniture"
            | "luggage"
            | "software"
            | "hardware"
            | "feedback"
            | "progress"
            | "research"
            | "evidence"
            | "news"
            | "series"
            | "species"
            | "sheep"
            | "deer"
            | "fish"
    )
}

/// Get map of irregular plural forms
fn get_irregular_plurals() -> HashMap<&'static str, &'static str> {
    let mut map = HashMap::new();

    // Common irregular plurals
    map.insert("person", "people");
    map.insert("man", "men");
    map.insert("woman", "women");
    map.insert("child", "children");
    map.insert("tooth", "teeth");
    map.insert("foot", "feet");
    map.insert("mouse", "mice");
    map.insert("goose", "geese");
    map.insert("ox", "oxen");
    map.insert("leaf", "leaves");
    map.insert("life", "lives");
    map.insert("wife", "wives");
    map.insert("knife", "knives");
    map.insert("half", "halves");
    map.insert("self", "selves");
    map.insert("elf", "elves");
    map.insert("loaf", "loaves");
    map.insert("potato", "potatoes");
    map.insert("tomato", "tomatoes");
    map.insert("cactus", "cacti");
    map.insert("focus", "foci");
    map.insert("fungus", "fungi");
    map.insert("nucleus", "nuclei");
    map.insert("radius", "radii");
    map.insert("alumnus", "alumni");
    map.insert("analysis", "analyses");
    map.insert("diagnosis", "diagnoses");
    map.insert("thesis", "theses");
    map.insert("crisis", "crises");
    map.insert("phenomenon", "phenomena");
    map.insert("criterion", "criteria");
    map.insert("datum", "data");

    map
}

/// Apply the original word's case pattern to the plural form
///
/// This preserves capitalization when pluralizing (e.g., "Person" → "People", not "people")
fn apply_case_pattern(original: &str, plural: &str) -> String {
    if original.is_empty() {
        return plural.to_string();
    }

    // If original is all uppercase, return uppercase plural
    if original
        .chars()
        .all(|c| !c.is_alphabetic() || c.is_uppercase())
    {
        return plural.to_uppercase();
    }

    // If original starts with uppercase, capitalize plural
    if original.chars().next().unwrap().is_uppercase() {
        let mut chars = plural.chars();
        match chars.next() {
            Some(first) => first.to_uppercase().chain(chars).collect(),
            None => String::new(),
        }
    } else {
        plural.to_string()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_to_snake_case_simple() {
        assert_eq!(to_snake_case("User"), "user");
        assert_eq!(to_snake_case("BlogPost"), "blog_post");
        assert_eq!(to_snake_case("HTTPRequest"), "http_request");
        assert_eq!(to_snake_case("XMLParser"), "xml_parser");
    }

    #[test]
    fn test_to_snake_case_with_numbers() {
        assert_eq!(to_snake_case("User2FA"), "user2_fa");
        assert_eq!(to_snake_case("OAuth2Client"), "o_auth2_client");
    }

    #[test]
    fn test_to_snake_case_already_snake() {
        assert_eq!(to_snake_case("already_snake_case"), "already_snake_case");
        assert_eq!(to_snake_case("user"), "user");
    }

    #[test]
    fn test_to_snake_case_edge_cases() {
        assert_eq!(to_snake_case(""), "");
        assert_eq!(to_snake_case("A"), "a");
        assert_eq!(to_snake_case("AB"), "ab");
        assert_eq!(to_snake_case("ABC"), "abc");
    }

    #[test]
    fn test_pluralize_regular() {
        assert_eq!(pluralize("user"), "users");
        assert_eq!(pluralize("post"), "posts");
        assert_eq!(pluralize("item"), "items");
        assert_eq!(pluralize("tag"), "tags");
    }

    #[test]
    fn test_pluralize_sibilants() {
        assert_eq!(pluralize("class"), "classes");
        assert_eq!(pluralize("box"), "boxes");
        assert_eq!(pluralize("buzz"), "buzzes");
        assert_eq!(pluralize("church"), "churches");
        assert_eq!(pluralize("dish"), "dishes");
    }

    #[test]
    fn test_pluralize_consonant_y() {
        assert_eq!(pluralize("story"), "stories");
        assert_eq!(pluralize("category"), "categories");
        assert_eq!(pluralize("company"), "companies");
    }

    #[test]
    fn test_pluralize_vowel_y() {
        assert_eq!(pluralize("boy"), "boys");
        assert_eq!(pluralize("day"), "days");
        assert_eq!(pluralize("key"), "keys");
    }

    #[test]
    fn test_pluralize_f_fe() {
        assert_eq!(pluralize("leaf"), "leaves");
        assert_eq!(pluralize("knife"), "knives");
        assert_eq!(pluralize("life"), "lives");
        assert_eq!(pluralize("wife"), "wives");
    }

    #[test]
    fn test_pluralize_consonant_o() {
        assert_eq!(pluralize("hero"), "heroes");
        assert_eq!(pluralize("potato"), "potatoes");
        assert_eq!(pluralize("tomato"), "tomatoes");
    }

    #[test]
    fn test_pluralize_vowel_o() {
        assert_eq!(pluralize("video"), "videos");
        assert_eq!(pluralize("stereo"), "stereos");
    }

    #[test]
    fn test_pluralize_irregular() {
        assert_eq!(pluralize("person"), "people");
        assert_eq!(pluralize("man"), "men");
        assert_eq!(pluralize("woman"), "women");
        assert_eq!(pluralize("child"), "children");
        assert_eq!(pluralize("tooth"), "teeth");
        assert_eq!(pluralize("foot"), "feet");
        assert_eq!(pluralize("mouse"), "mice");
        assert_eq!(pluralize("goose"), "geese");
    }

    #[test]
    fn test_pluralize_irregular_capitalized() {
        assert_eq!(pluralize("Person"), "People");
        assert_eq!(pluralize("Child"), "Children");
        assert_eq!(pluralize("PERSON"), "PEOPLE");
    }

    #[test]
    fn test_pluralize_uncountable() {
        assert_eq!(pluralize("data"), "data");
        assert_eq!(pluralize("information"), "information");
        assert_eq!(pluralize("equipment"), "equipment");
        assert_eq!(pluralize("sheep"), "sheep");
        assert_eq!(pluralize("deer"), "deer");
        assert_eq!(pluralize("fish"), "fish");
    }

    #[test]
    fn test_pluralize_edge_cases() {
        assert_eq!(pluralize(""), "");
        assert_eq!(pluralize("a"), "as");
        assert_eq!(pluralize("I"), "Is");
    }

    #[test]
    fn test_collection_name() {
        assert_eq!(collection_name("User"), "users");
        assert_eq!(collection_name("BlogPost"), "blog_posts");
        assert_eq!(collection_name("Person"), "people");
        assert_eq!(collection_name("Category"), "categories");
        assert_eq!(collection_name("Company"), "companies");
    }

    #[test]
    fn test_collection_name_complex() {
        assert_eq!(collection_name("HTTPRequest"), "http_requests");
        assert_eq!(collection_name("XMLDocument"), "xml_documents");
        assert_eq!(collection_name("OAuth2Client"), "o_auth2_clients");
    }

    #[test]
    fn test_is_vowel() {
        assert!(is_vowel('a'));
        assert!(is_vowel('e'));
        assert!(is_vowel('i'));
        assert!(is_vowel('o'));
        assert!(is_vowel('u'));
        assert!(!is_vowel('b'));
        assert!(!is_vowel('x'));
    }

    #[test]
    fn test_is_uncountable() {
        assert!(is_uncountable("data"));
        assert!(is_uncountable("information"));
        assert!(is_uncountable("sheep"));
        assert!(!is_uncountable("user"));
        assert!(!is_uncountable("post"));
    }

    #[test]
    fn test_apply_case_pattern() {
        assert_eq!(apply_case_pattern("Person", "people"), "People");
        assert_eq!(apply_case_pattern("person", "people"), "people");
        assert_eq!(apply_case_pattern("PERSON", "people"), "PEOPLE");
        assert_eq!(apply_case_pattern("Child", "children"), "Children");
    }

    #[test]
    fn test_latin_plurals() {
        assert_eq!(pluralize("cactus"), "cacti");
        assert_eq!(pluralize("focus"), "foci");
        assert_eq!(pluralize("radius"), "radii");
        assert_eq!(pluralize("alumnus"), "alumni");
    }

    #[test]
    fn test_greek_plurals() {
        assert_eq!(pluralize("analysis"), "analyses");
        assert_eq!(pluralize("thesis"), "theses");
        assert_eq!(pluralize("crisis"), "crises");
        assert_eq!(pluralize("phenomenon"), "phenomena");
        assert_eq!(pluralize("criterion"), "criteria");
    }
}