Skip to main content

lance_graph/
case_insensitive.rs

1//! Case-insensitive string utilities
2//!
3//! We use case-insensitive identifiers throughout to provide a SQL-like,
4//! forgiving user experience.
5//!
6//! All identifiers (labels, properties, relationships) are case-insensitive,
7//! meaning `Person`, `PERSON`, and `person` all refer to the same entity.
8
9use std::collections::HashMap;
10use std::hash::{Hash, Hasher};
11
12/// A string wrapper that performs case-insensitive comparisons and hashing.
13///
14/// Internally stores the lowercase version for efficient comparison.
15/// This is the core building block for case-insensitive identifier handling.
16///
17/// # Examples
18///
19/// ```
20/// use lance_graph::case_insensitive::CaseInsensitiveStr;
21///
22/// let a = CaseInsensitiveStr::new("Person");
23/// let b = CaseInsensitiveStr::new("person");
24/// let c = CaseInsensitiveStr::new("PERSON");
25///
26/// assert_eq!(a, b);
27/// assert_eq!(b, c);
28/// ```
29#[derive(Debug, Clone)]
30pub struct CaseInsensitiveStr {
31    normalized: String, // Always lowercase
32}
33
34impl CaseInsensitiveStr {
35    /// Create a new case-insensitive string from any string-like type.
36    ///
37    /// The input is immediately converted to lowercase for storage.
38    pub fn new(s: impl Into<String>) -> Self {
39        Self {
40            normalized: s.into().to_lowercase(),
41        }
42    }
43
44    /// Get the normalized (lowercase) string representation.
45    pub fn as_str(&self) -> &str {
46        &self.normalized
47    }
48}
49
50impl PartialEq for CaseInsensitiveStr {
51    fn eq(&self, other: &Self) -> bool {
52        self.normalized == other.normalized
53    }
54}
55
56impl Eq for CaseInsensitiveStr {}
57
58impl Hash for CaseInsensitiveStr {
59    fn hash<H: Hasher>(&self, state: &mut H) {
60        self.normalized.hash(state);
61    }
62}
63
64impl From<&str> for CaseInsensitiveStr {
65    fn from(s: &str) -> Self {
66        Self::new(s)
67    }
68}
69
70impl From<String> for CaseInsensitiveStr {
71    fn from(s: String) -> Self {
72        Self::new(s)
73    }
74}
75
76impl AsRef<str> for CaseInsensitiveStr {
77    fn as_ref(&self) -> &str {
78        &self.normalized
79    }
80}
81
82/// Case-insensitive HashMap type alias for convenience
83///
84/// Use this type when you need a HashMap that performs case-insensitive
85/// key lookups throughout the system.
86pub type CaseInsensitiveMap<V> = HashMap<CaseInsensitiveStr, V>;
87
88/// Create a qualified column name for internal DataFusion operations.
89///
90/// Returns format: `alias__property` (e.g., "p__name").
91/// Both alias and property are normalized to lowercase for case-insensitive behavior.
92///
93/// This is the central utility for creating qualified column names throughout
94/// the codebase. All join keys, scan projections, and expression translations
95/// should use this function to ensure consistent case-insensitive behavior.
96///
97/// # Examples
98///
99/// ```
100/// use lance_graph::case_insensitive::qualify_column;
101///
102/// assert_eq!(qualify_column("Person", "Name"), "person__name");
103/// assert_eq!(qualify_column("p", "fullName"), "p__fullname");
104/// ```
105#[inline]
106pub fn qualify_column(alias: &str, property: &str) -> String {
107    format!("{}__{}", alias.to_lowercase(), property.to_lowercase())
108}
109
110/// Helper trait for case-insensitive lookups on standard HashMap<String, V>
111///
112/// This trait provides extension methods for performing case-insensitive
113/// lookups on existing String-keyed HashMaps without requiring migration
114/// to CaseInsensitiveMap.
115///
116/// # Performance
117///
118/// - **Best case**: O(1) when exact case matches (uses HashMap's fast path)
119/// - **Worst case**: O(n) when case differs (iterates all keys to find match)
120///
121/// **For hot paths** (executed per-row or frequently), store normalized (lowercase)
122/// keys in the HashMap to guarantee O(1) lookups. This trait is most appropriate for:
123/// - Small HashMaps (< 100 entries)
124/// - Cold paths (planning phase, executed once per query)
125/// - Cases where preserving original case in keys is important
126///
127/// See `SemanticAnalyzer::variables` for an example of the optimized pattern where
128/// keys are normalized to lowercase on insertion, ensuring all lookups hit the O(1) fast path.
129///
130/// # Examples
131///
132/// ```
133/// use std::collections::HashMap;
134/// use lance_graph::case_insensitive::CaseInsensitiveLookup;
135///
136/// let mut map = HashMap::new();
137/// map.insert("Person".to_string(), 1);
138///
139/// assert_eq!(map.get_ci("person"), Some(&1));
140/// assert_eq!(map.get_ci("PERSON"), Some(&1));
141/// assert_eq!(map.get_ci("Person"), Some(&1));
142/// ```
143pub trait CaseInsensitiveLookup<V> {
144    /// Get a value with case-insensitive key lookup.
145    ///
146    /// Returns `Some(&V)` if a key matches (case-insensitively), `None` otherwise.
147    fn get_ci(&self, key: &str) -> Option<&V>;
148
149    /// Check if a key exists with case-insensitive lookup.
150    fn contains_key_ci(&self, key: &str) -> bool;
151
152    /// Get a mutable reference with case-insensitive key lookup.
153    fn get_mut_ci(&mut self, key: &str) -> Option<&mut V>;
154}
155
156impl<V> CaseInsensitiveLookup<V> for HashMap<String, V> {
157    fn get_ci(&self, key: &str) -> Option<&V> {
158        // Try exact match first (fast path for common case)
159        if let Some(v) = self.get(key) {
160            return Some(v);
161        }
162        // Fall back to case-insensitive search
163        let key_lower = key.to_lowercase();
164        self.iter()
165            .find(|(k, _)| k.to_lowercase() == key_lower)
166            .map(|(_, v)| v)
167    }
168
169    fn contains_key_ci(&self, key: &str) -> bool {
170        self.get_ci(key).is_some()
171    }
172
173    fn get_mut_ci(&mut self, key: &str) -> Option<&mut V> {
174        // Find the actual key first
175        let key_lower = key.to_lowercase();
176        let actual_key = self.keys().find(|k| k.to_lowercase() == key_lower).cloned();
177
178        // Then get mutable reference using the actual key
179        actual_key.and_then(|k| self.get_mut(&k))
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn test_case_insensitive_str_equality() {
189        let a = CaseInsensitiveStr::new("Person");
190        let b = CaseInsensitiveStr::new("person");
191        let c = CaseInsensitiveStr::new("PERSON");
192        let d = CaseInsensitiveStr::new("PeRsOn");
193
194        assert_eq!(a, b);
195        assert_eq!(b, c);
196        assert_eq!(a, c);
197        assert_eq!(c, d);
198    }
199
200    #[test]
201    fn test_case_insensitive_str_inequality() {
202        let a = CaseInsensitiveStr::new("Person");
203        let b = CaseInsensitiveStr::new("Company");
204
205        assert_ne!(a, b);
206    }
207
208    #[test]
209    fn test_case_insensitive_str_hash() {
210        use std::collections::HashSet;
211
212        let mut set = HashSet::new();
213        set.insert(CaseInsensitiveStr::new("Person"));
214
215        // All variations should be found
216        assert!(set.contains(&CaseInsensitiveStr::new("person")));
217        assert!(set.contains(&CaseInsensitiveStr::new("PERSON")));
218        assert!(set.contains(&CaseInsensitiveStr::new("Person")));
219
220        // Different value should not be found
221        assert!(!set.contains(&CaseInsensitiveStr::new("Company")));
222    }
223
224    #[test]
225    fn test_case_insensitive_map() {
226        let mut map: CaseInsensitiveMap<i32> = HashMap::new();
227        map.insert(CaseInsensitiveStr::new("Person"), 1);
228        map.insert(CaseInsensitiveStr::new("Company"), 2);
229
230        // Test various cases
231        assert_eq!(map.get(&CaseInsensitiveStr::new("person")), Some(&1));
232        assert_eq!(map.get(&CaseInsensitiveStr::new("PERSON")), Some(&1));
233        assert_eq!(map.get(&CaseInsensitiveStr::new("Person")), Some(&1));
234        assert_eq!(map.get(&CaseInsensitiveStr::new("PeRsOn")), Some(&1));
235
236        assert_eq!(map.get(&CaseInsensitiveStr::new("company")), Some(&2));
237        assert_eq!(map.get(&CaseInsensitiveStr::new("COMPANY")), Some(&2));
238
239        assert_eq!(map.get(&CaseInsensitiveStr::new("Unknown")), None);
240    }
241
242    #[test]
243    fn test_case_insensitive_lookup_trait() {
244        let mut map = HashMap::new();
245        map.insert("Person".to_string(), 1);
246        map.insert("Company".to_string(), 2);
247        map.insert("fullName".to_string(), 3);
248
249        // Test get_ci
250        assert_eq!(map.get_ci("person"), Some(&1));
251        assert_eq!(map.get_ci("PERSON"), Some(&1));
252        assert_eq!(map.get_ci("Person"), Some(&1));
253        assert_eq!(map.get_ci("PeRsOn"), Some(&1));
254
255        assert_eq!(map.get_ci("company"), Some(&2));
256        assert_eq!(map.get_ci("COMPANY"), Some(&2));
257
258        assert_eq!(map.get_ci("fullname"), Some(&3));
259        assert_eq!(map.get_ci("FULLNAME"), Some(&3));
260        assert_eq!(map.get_ci("FullName"), Some(&3));
261
262        assert_eq!(map.get_ci("Unknown"), None);
263
264        // Test contains_key_ci
265        assert!(map.contains_key_ci("person"));
266        assert!(map.contains_key_ci("COMPANY"));
267        assert!(map.contains_key_ci("FullName"));
268        assert!(!map.contains_key_ci("Unknown"));
269    }
270
271    #[test]
272    fn test_case_insensitive_lookup_exact_match_fast_path() {
273        let mut map = HashMap::new();
274        map.insert("Person".to_string(), 1);
275
276        // Exact match should use fast path
277        assert_eq!(map.get_ci("Person"), Some(&1));
278
279        // Case variations should still work
280        assert_eq!(map.get_ci("person"), Some(&1));
281        assert_eq!(map.get_ci("PERSON"), Some(&1));
282    }
283
284    #[test]
285    fn test_case_insensitive_str_as_str() {
286        let s = CaseInsensitiveStr::new("Person");
287        assert_eq!(s.as_str(), "person"); // Stored as lowercase
288    }
289
290    #[test]
291    fn test_case_insensitive_str_from_string() {
292        let s = String::from("Person");
293        let ci_str: CaseInsensitiveStr = s.into();
294        assert_eq!(ci_str.as_str(), "person");
295    }
296
297    #[test]
298    fn test_case_insensitive_str_from_str() {
299        let ci_str: CaseInsensitiveStr = "Person".into();
300        assert_eq!(ci_str.as_str(), "person");
301    }
302
303    #[test]
304    fn test_case_insensitive_map_insertion_deduplication() {
305        let mut map: CaseInsensitiveMap<i32> = HashMap::new();
306
307        // Insert with different cases - should overwrite
308        map.insert(CaseInsensitiveStr::new("Person"), 1);
309        map.insert(CaseInsensitiveStr::new("person"), 2);
310        map.insert(CaseInsensitiveStr::new("PERSON"), 3);
311
312        // Should have only one entry with the latest value
313        assert_eq!(map.len(), 1);
314        assert_eq!(map.get(&CaseInsensitiveStr::new("person")), Some(&3));
315    }
316
317    #[test]
318    fn test_get_mut_ci() {
319        let mut map = HashMap::new();
320        map.insert("Person".to_string(), 1);
321        map.insert("Company".to_string(), 2);
322
323        // Test mutable access with different cases
324        if let Some(v) = map.get_mut_ci("person") {
325            *v = 10;
326        }
327        assert_eq!(map.get_ci("Person"), Some(&10));
328
329        if let Some(v) = map.get_mut_ci("COMPANY") {
330            *v = 20;
331        }
332        assert_eq!(map.get_ci("company"), Some(&20));
333
334        // Non-existent key
335        assert!(map.get_mut_ci("Unknown").is_none());
336    }
337
338    #[test]
339    fn test_property_name_normalization() {
340        // Test realistic property names from Issue #105
341        let mut map = HashMap::new();
342        map.insert("fullName".to_string(), 1);
343        map.insert("isActive".to_string(), 2);
344        map.insert("numFollowers".to_string(), 3);
345
346        // All variations should work
347        assert_eq!(map.get_ci("fullname"), Some(&1));
348        assert_eq!(map.get_ci("FULLNAME"), Some(&1));
349        assert_eq!(map.get_ci("FullName"), Some(&1));
350
351        assert_eq!(map.get_ci("isactive"), Some(&2));
352        assert_eq!(map.get_ci("ISACTIVE"), Some(&2));
353        assert_eq!(map.get_ci("IsActive"), Some(&2));
354
355        assert_eq!(map.get_ci("numfollowers"), Some(&3));
356        assert_eq!(map.get_ci("NUMFOLLOWERS"), Some(&3));
357        assert_eq!(map.get_ci("NumFollowers"), Some(&3));
358    }
359
360    #[test]
361    fn test_qualify_column() {
362        use super::qualify_column;
363
364        // Basic usage
365        assert_eq!(qualify_column("p", "name"), "p__name");
366        assert_eq!(qualify_column("person", "age"), "person__age");
367
368        // Case normalization
369        assert_eq!(qualify_column("P", "Name"), "p__name");
370        assert_eq!(qualify_column("PERSON", "AGE"), "person__age");
371        assert_eq!(qualify_column("Person", "fullName"), "person__fullname");
372
373        // Mixed case
374        assert_eq!(qualify_column("MyVar", "IsActive"), "myvar__isactive");
375        assert_eq!(qualify_column("a", "NumFollowers"), "a__numfollowers");
376    }
377}