Skip to main content

fresh/primitives/
detected_language.rs

1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::highlight_engine::HighlightEngine;
9use crate::primitives::highlighter::Language;
10use crate::primitives::GrammarRegistry;
11use std::collections::HashMap;
12use std::path::Path;
13
14/// The result of language detection — groups the things that must stay in sync
15/// on an `EditorState`: the language ID, display name, highlighting engine, and
16/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
17pub struct DetectedLanguage {
18    /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
19    pub name: String,
20    /// Human-readable display name shown in the status bar and Set Language prompt
21    /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
22    pub display_name: String,
23    /// The highlighting engine to use for this buffer.
24    pub highlighter: HighlightEngine,
25    /// The tree-sitter Language, if available (used for reference highlighting,
26    /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
27    /// support; this is `None` for the remaining 100+ syntect-only languages.
28    pub ts_language: Option<Language>,
29}
30
31impl DetectedLanguage {
32    /// Detect language from a file path using user configuration.
33    ///
34    /// This is the primary detection path used when opening, reloading, or saving files.
35    /// Priority order matches the grammar registry:
36    /// 1. Exact filename match in user config
37    /// 2. Glob pattern match in user config
38    /// 3. Extension match in user config
39    /// 4. Built-in detection (tree-sitter `Language::from_path` + syntect)
40    /// 5. Fallback config (if set and no other match found)
41    pub fn from_path(
42        path: &Path,
43        registry: &GrammarRegistry,
44        languages: &HashMap<String, LanguageConfig>,
45    ) -> Self {
46        Self::from_path_with_fallback(path, registry, languages, None)
47    }
48
49    /// Like `from_path`, but also accepts an optional default language name
50    /// that is applied when no language is detected (#1219).
51    /// The `default_language` must reference a key in the `languages` map.
52    pub fn from_path_with_fallback(
53        path: &Path,
54        registry: &GrammarRegistry,
55        languages: &HashMap<String, LanguageConfig>,
56        default_language: Option<&str>,
57    ) -> Self {
58        let highlighter = HighlightEngine::for_file(path, registry, Some(languages));
59        let ts_language = Language::from_path(path);
60        // Prefer config-based language name (e.g., "csharp") so it matches
61        // the LSP config key. Fall back to tree-sitter name (e.g., "c_sharp")
62        // or "text" if neither is available.
63        let name =
64            crate::services::lsp::manager::detect_language(path, languages).unwrap_or_else(|| {
65                ts_language
66                    .as_ref()
67                    .map(|l| l.to_string())
68                    .unwrap_or_else(|| "text".to_string())
69            });
70        // Resolve display name from the syntax matched for this file.
71        let display_name = registry
72            .find_syntax_for_file_with_languages(path, languages)
73            .map(|s| s.name.clone())
74            .unwrap_or_else(|| name.clone());
75
76        // If no language was detected and a default_language is configured,
77        // look up its grammar for highlighting (#1219)
78        if name == "text" && matches!(highlighter, HighlightEngine::None) {
79            if let Some(lang_key) = default_language {
80                let grammar = languages
81                    .get(lang_key)
82                    .map(|lc| lc.grammar.as_str())
83                    .filter(|g| !g.is_empty())
84                    .unwrap_or(lang_key);
85                let fb_highlighter =
86                    HighlightEngine::for_syntax_name(grammar, registry, ts_language);
87                if !matches!(fb_highlighter, HighlightEngine::None) {
88                    let fb_display = registry
89                        .find_syntax_by_name(grammar)
90                        .map(|s| s.name.clone())
91                        .unwrap_or_else(|| grammar.to_string());
92                    return Self {
93                        name,
94                        display_name: fb_display,
95                        highlighter: fb_highlighter,
96                        ts_language,
97                    };
98                }
99            }
100        }
101
102        Self {
103            name,
104            display_name,
105            highlighter,
106            ts_language,
107        }
108    }
109
110    /// Detect language from a file path using only built-in rules (no user config).
111    ///
112    /// Used by `from_file()` (the legacy constructor) and for virtual buffer names
113    /// where user config doesn't apply.
114    pub fn from_path_builtin(path: &Path, registry: &GrammarRegistry) -> Self {
115        let highlighter = HighlightEngine::for_file(path, registry, None);
116        let ts_language = Language::from_path(path);
117        let name = ts_language
118            .as_ref()
119            .map(|l| l.to_string())
120            .unwrap_or_else(|| "text".to_string());
121        let display_name = registry
122            .find_syntax_for_file(path)
123            .map(|s| s.name.clone())
124            .unwrap_or_else(|| name.clone());
125        Self {
126            name,
127            display_name,
128            highlighter,
129            ts_language,
130        }
131    }
132
133    /// Set language by syntax name (user selected from the language palette).
134    ///
135    /// Looks up the syntax in the grammar registry and optionally finds a
136    /// tree-sitter language for enhanced features. The `languages` config is used
137    /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
138    /// Returns `None` if the syntax name is not found in the registry.
139    pub fn from_syntax_name(
140        name: &str,
141        registry: &GrammarRegistry,
142        languages: &HashMap<String, LanguageConfig>,
143    ) -> Option<Self> {
144        if registry.find_syntax_by_name(name).is_some() {
145            let ts_language = Language::from_name(name);
146            let highlighter = HighlightEngine::for_syntax_name(name, registry, ts_language);
147            // Resolve the canonical language ID from config (e.g., "Rust" → "rust").
148            let language_id =
149                resolve_language_id(name, registry, languages).unwrap_or_else(|| name.to_string());
150            Some(Self {
151                name: language_id,
152                display_name: name.to_string(),
153                highlighter,
154                ts_language,
155            })
156        } else {
157            None
158        }
159    }
160
161    /// Create a DetectedLanguage for a user-configured language that has no
162    /// matching syntect grammar. No syntax highlighting, but the language ID
163    /// is set correctly for config/LSP purposes.
164    pub fn from_config_language(lang_id: &str) -> Self {
165        Self {
166            name: lang_id.to_string(),
167            display_name: lang_id.to_string(),
168            highlighter: HighlightEngine::None,
169            ts_language: None,
170        }
171    }
172
173    /// Plain text — no highlighting.
174    pub fn plain_text() -> Self {
175        Self {
176            name: "text".to_string(),
177            display_name: "Text".to_string(),
178            highlighter: HighlightEngine::None,
179            ts_language: None,
180        }
181    }
182
183    /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
184    ///
185    /// Strips surrounding `*` characters and extracts the filename after any
186    /// prefix like "OLD:" or "NEW:".
187    pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
188        let cleaned = name.trim_matches('*');
189        let filename = if let Some(pos) = cleaned.rfind(':') {
190            &cleaned[pos + 1..]
191        } else {
192            cleaned
193        };
194        Self::from_path_builtin(Path::new(filename), registry)
195    }
196}
197
198/// Resolve a syntect syntax display name to its canonical config language ID.
199///
200/// The config `[languages]` section is the single authoritative registry of
201/// language IDs. Each entry has a `grammar` field that is resolved to a
202/// syntect syntax via the grammar registry. This function performs the reverse
203/// lookup: for each config entry, resolve its grammar through the registry
204/// and check whether the resulting syntax matches.
205pub fn resolve_language_id(
206    syntax_name: &str,
207    registry: &GrammarRegistry,
208    languages: &HashMap<String, LanguageConfig>,
209) -> Option<String> {
210    for (lang_id, lang_config) in languages {
211        // Use find_syntax_for_lang_config which also tries extension fallback,
212        // needed when the grammar name doesn't match syntect's name
213        // (e.g., grammar "c_sharp" → syntect syntax "C#").
214        if let Some(syntax) = registry.find_syntax_for_lang_config(lang_config) {
215            if syntax.name == syntax_name {
216                return Some(lang_id.clone());
217            }
218        }
219    }
220    None
221}