Skip to main content

codelens_core/language/
registry.rs

1//! Language registry for detecting file languages.
2
3use std::collections::HashMap;
4use std::path::Path;
5use std::sync::Arc;
6
7use crate::error::Result;
8
9use super::definition::Language;
10
11/// Built-in language definitions (embedded at compile time).
12const BUILTIN_LANGUAGES: &str = include_str!("../../languages.toml");
13
14/// Registry of known programming languages.
15pub struct LanguageRegistry {
16    /// Extension to language mapping.
17    by_extension: HashMap<String, Arc<Language>>,
18    /// Filename to language mapping.
19    by_filename: HashMap<String, Arc<Language>>,
20    /// Language name to definition.
21    by_name: HashMap<String, Arc<Language>>,
22}
23
24impl LanguageRegistry {
25    /// Create an empty registry.
26    pub fn empty() -> Self {
27        Self {
28            by_extension: HashMap::new(),
29            by_filename: HashMap::new(),
30            by_name: HashMap::new(),
31        }
32    }
33
34    /// Create a registry with built-in language definitions.
35    pub fn with_builtin() -> Result<Self> {
36        let mut registry = Self::empty();
37        registry.load_toml(BUILTIN_LANGUAGES)?;
38        Ok(registry)
39    }
40
41    /// Load language definitions from TOML content.
42    pub fn load_toml(&mut self, content: &str) -> Result<()> {
43        let languages: HashMap<String, Language> = toml::from_str(content)?;
44
45        for (id, mut lang) in languages {
46            // Use the key as name if not specified
47            if lang.name.is_empty() || lang.name == "Unknown" {
48                lang.name = id.clone();
49            }
50
51            let lang = Arc::new(lang);
52
53            // Register by extension
54            for ext in &lang.extensions {
55                let ext = if ext.starts_with('.') {
56                    ext.to_lowercase()
57                } else {
58                    format!(".{}", ext.to_lowercase())
59                };
60                self.by_extension.insert(ext, Arc::clone(&lang));
61            }
62
63            // Register by filename
64            for filename in &lang.filenames {
65                self.by_filename
66                    .insert(filename.to_lowercase(), Arc::clone(&lang));
67            }
68
69            // Register by name
70            self.by_name.insert(lang.name.clone(), Arc::clone(&lang));
71            self.by_name.insert(id.to_lowercase(), Arc::clone(&lang));
72        }
73
74        Ok(())
75    }
76
77    /// Load additional language definitions from a file.
78    pub fn load_file(&mut self, path: &Path) -> Result<()> {
79        let content = std::fs::read_to_string(path).map_err(|e| crate::error::Error::FileRead {
80            path: path.to_path_buf(),
81            source: e,
82        })?;
83        self.load_toml(&content)
84    }
85
86    /// Detect the language of a file by its path.
87    pub fn detect(&self, path: &Path) -> Option<Arc<Language>> {
88        // First, try to match by filename
89        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
90            if let Some(lang) = self.by_filename.get(&filename.to_lowercase()) {
91                return Some(Arc::clone(lang));
92            }
93        }
94
95        // Then, try to match by extension
96        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
97            let ext = format!(".{}", ext.to_lowercase());
98            if let Some(lang) = self.by_extension.get(&ext) {
99                return Some(Arc::clone(lang));
100            }
101        }
102
103        None
104    }
105
106    /// Get a language by name.
107    pub fn get(&self, name: &str) -> Option<Arc<Language>> {
108        self.by_name
109            .get(name)
110            .or_else(|| self.by_name.get(&name.to_lowercase()))
111            .map(Arc::clone)
112    }
113
114    /// Get all registered languages.
115    pub fn all(&self) -> impl Iterator<Item = &Arc<Language>> {
116        self.by_name.values()
117    }
118
119    /// Get the number of registered languages.
120    pub fn len(&self) -> usize {
121        // Count unique languages (by name)
122        self.by_name.len() / 2 // Each language is registered twice (by name and id)
123    }
124
125    /// Check if the registry is empty.
126    pub fn is_empty(&self) -> bool {
127        self.by_name.is_empty()
128    }
129}
130
131impl Default for LanguageRegistry {
132    fn default() -> Self {
133        Self::with_builtin().unwrap_or_else(|_| Self::empty())
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn test_detect_by_extension() {
143        let registry = LanguageRegistry::with_builtin().unwrap();
144
145        let path = Path::new("main.rs");
146        let lang = registry.detect(path);
147        assert!(lang.is_some());
148        assert_eq!(lang.unwrap().name, "Rust");
149    }
150
151    #[test]
152    fn test_detect_by_filename() {
153        let registry = LanguageRegistry::with_builtin().unwrap();
154
155        let path = Path::new("Makefile");
156        let lang = registry.detect(path);
157        assert!(lang.is_some());
158        assert_eq!(lang.unwrap().name, "Makefile");
159    }
160
161    #[test]
162    fn test_unknown_extension() {
163        let registry = LanguageRegistry::with_builtin().unwrap();
164
165        let path = Path::new("file.unknown_extension_xyz");
166        let lang = registry.detect(path);
167        assert!(lang.is_none());
168    }
169
170    #[test]
171    fn test_load_custom_language() {
172        let mut registry = LanguageRegistry::empty();
173
174        let toml = r#"
175            [mylang]
176            name = "MyLang"
177            extensions = [".ml"]
178            line_comments = [";;"]
179        "#;
180
181        registry.load_toml(toml).unwrap();
182
183        let path = Path::new("test.ml");
184        let lang = registry.detect(path);
185        assert!(lang.is_some());
186        assert_eq!(lang.unwrap().name, "MyLang");
187    }
188}