hamelin_translation 0.7.9

Lowering and IR for Hamelin query language
Documentation
//! Unique name generation for normalization passes.

use std::collections::HashSet;
use std::sync::Arc;

use hamelin_lib::tree::{
    ast::identifier::{Identifier, SimpleIdentifier},
    typed_ast::{environment::TypeEnvironment, query::TypedStatement},
};

/// Trait for types that can check if a name exists.
///
/// Used by `UniqueNameGenerator` to avoid generating names that collide
/// with existing bindings.
pub trait HasName {
    /// Check if the given simple identifier exists in this namespace.
    fn has_name(&self, id: &SimpleIdentifier) -> bool;
}

/// Generator for unique identifiers during normalization.
///
/// Produces names like `__alias_0`, `__alias_1`, etc. for a given prefix.
/// The counter is shared across all calls, ensuring uniqueness within a
/// normalization pass.
#[derive(Debug)]
pub struct UniqueNameGenerator {
    prefix: &'static str,
    counter: usize,
}

impl UniqueNameGenerator {
    /// Create a new generator with the given prefix.
    pub fn new(prefix: &'static str) -> Self {
        Self { prefix, counter: 0 }
    }

    /// Generate the next unique name, ensuring it doesn't collide with existing names.
    ///
    /// Increments the counter until finding a name that doesn't exist in the namespace.
    pub fn next(&mut self, namespace: &impl HasName) -> SimpleIdentifier {
        loop {
            let name = format!("{}_{}", self.prefix, self.counter);
            self.counter += 1;
            let id = SimpleIdentifier::new(name);
            if !namespace.has_name(&id) {
                return id;
            }
        }
    }
}

// ============================================================================
// HasName implementations
// ============================================================================

impl HasName for HashSet<String> {
    fn has_name(&self, id: &SimpleIdentifier) -> bool {
        self.contains(id.as_str())
    }
}

impl HasName for TypeEnvironment {
    fn has_name(&self, id: &SimpleIdentifier) -> bool {
        self.lookup(&id.clone().into()).is_some()
    }
}

impl HasName for Arc<TypeEnvironment> {
    fn has_name(&self, id: &SimpleIdentifier) -> bool {
        self.as_ref().has_name(id)
    }
}

impl HasName for TypedStatement {
    fn has_name(&self, id: &SimpleIdentifier) -> bool {
        self.with_clauses.iter().any(|wc| {
            wc.name
                .valid_ref()
                .map(|name| matches!(name, Identifier::Simple(s) if s == id))
                .unwrap_or(false)
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use hamelin_lib::types::INT;

    #[test]
    fn test_generates_sequential_names() {
        let mut gen = UniqueNameGenerator::new("__test");
        let schema = TypeEnvironment::default();

        assert_eq!(gen.next(&schema).as_str(), "__test_0");
        assert_eq!(gen.next(&schema).as_str(), "__test_1");
        assert_eq!(gen.next(&schema).as_str(), "__test_2");
    }

    #[test]
    fn test_different_prefixes_are_independent() {
        let mut alias_gen = UniqueNameGenerator::new("__alias");
        let mut from_gen = UniqueNameGenerator::new("__from");
        let schema = TypeEnvironment::default();

        assert_eq!(alias_gen.next(&schema).as_str(), "__alias_0");
        assert_eq!(from_gen.next(&schema).as_str(), "__from_0");
        assert_eq!(alias_gen.next(&schema).as_str(), "__alias_1");
        assert_eq!(from_gen.next(&schema).as_str(), "__from_1");
    }

    #[test]
    fn test_skips_existing_names_in_schema() {
        let mut gen = UniqueNameGenerator::new("__test");

        // Create a schema with __test_0 and __test_2 already present
        let schema = TypeEnvironment::default()
            .with_str("__test_0", INT.clone())
            .with_str("__test_2", INT.clone());

        // Should skip __test_0 (exists) and return __test_1
        assert_eq!(gen.next(&schema).as_str(), "__test_1");
        // Should skip __test_2 (exists) and return __test_3
        assert_eq!(gen.next(&schema).as_str(), "__test_3");
        // __test_4 doesn't exist, should return it
        assert_eq!(gen.next(&schema).as_str(), "__test_4");
    }
}