Skip to main content

fresh/primitives/
detected_language.rs

1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::highlight_engine::HighlightEngine;
9use crate::primitives::highlighter::Language;
10use crate::primitives::GrammarRegistry;
11use std::collections::HashMap;
12use std::path::Path;
13
14/// The result of language detection — groups the things that must stay in sync
15/// on an `EditorState`: the language ID, display name, highlighting engine, and
16/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
17pub struct DetectedLanguage {
18    /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
19    pub name: String,
20    /// Human-readable display name shown in the status bar and Set Language prompt
21    /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
22    pub display_name: String,
23    /// The highlighting engine to use for this buffer.
24    pub highlighter: HighlightEngine,
25    /// The tree-sitter Language, if available (used for reference highlighting,
26    /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
27    /// support; this is `None` for the remaining 100+ syntect-only languages.
28    pub ts_language: Option<Language>,
29}
30
31impl DetectedLanguage {
32    /// Detect language from a file path using user configuration.
33    ///
34    /// This is the primary detection path used when opening, reloading, or saving files.
35    /// Priority order matches the grammar registry:
36    /// 1. Exact filename match in user config
37    /// 2. Glob pattern match in user config
38    /// 3. Extension match in user config
39    /// 4. Built-in detection (tree-sitter `Language::from_path` + syntect)
40    /// 5. Fallback config (if set and no other match found)
41    pub fn from_path(
42        path: &Path,
43        registry: &GrammarRegistry,
44        languages: &HashMap<String, LanguageConfig>,
45    ) -> Self {
46        Self::from_path_with_fallback(path, registry, languages, None)
47    }
48
49    /// Like `from_path`, but also accepts an optional fallback language config
50    /// that is applied when no language is detected (#1219).
51    pub fn from_path_with_fallback(
52        path: &Path,
53        registry: &GrammarRegistry,
54        languages: &HashMap<String, LanguageConfig>,
55        fallback: Option<&LanguageConfig>,
56    ) -> Self {
57        let highlighter = HighlightEngine::for_file_with_languages(path, registry, languages);
58        let ts_language = Language::from_path(path);
59        // Prefer config-based language name (e.g., "csharp") so it matches
60        // the LSP config key. Fall back to tree-sitter name (e.g., "c_sharp")
61        // or "text" if neither is available.
62        let name =
63            crate::services::lsp::manager::detect_language(path, languages).unwrap_or_else(|| {
64                ts_language
65                    .as_ref()
66                    .map(|l| l.to_string())
67                    .unwrap_or_else(|| "text".to_string())
68            });
69        // Resolve display name from the syntax matched for this file.
70        let display_name = registry
71            .find_syntax_for_file_with_languages(path, languages)
72            .map(|s| s.name.clone())
73            .unwrap_or_else(|| name.clone());
74
75        // If no language was detected and a fallback config is set with a grammar,
76        // try to use the fallback grammar for highlighting (#1219)
77        if name == "text" && matches!(highlighter, HighlightEngine::None) {
78            if let Some(fb) = fallback {
79                if !fb.grammar.is_empty() {
80                    let fb_highlighter =
81                        HighlightEngine::for_syntax_name(&fb.grammar, registry, ts_language);
82                    if !matches!(fb_highlighter, HighlightEngine::None) {
83                        let fb_display = registry
84                            .find_syntax_by_name(&fb.grammar)
85                            .map(|s| s.name.clone())
86                            .unwrap_or_else(|| fb.grammar.clone());
87                        return Self {
88                            name,
89                            display_name: fb_display,
90                            highlighter: fb_highlighter,
91                            ts_language,
92                        };
93                    }
94                }
95            }
96        }
97
98        Self {
99            name,
100            display_name,
101            highlighter,
102            ts_language,
103        }
104    }
105
106    /// Detect language from a file path using only built-in rules (no user config).
107    ///
108    /// Used by `from_file()` (the legacy constructor) and for virtual buffer names
109    /// where user config doesn't apply.
110    pub fn from_path_builtin(path: &Path, registry: &GrammarRegistry) -> Self {
111        let highlighter = HighlightEngine::for_file(path, registry);
112        let ts_language = Language::from_path(path);
113        let name = ts_language
114            .as_ref()
115            .map(|l| l.to_string())
116            .unwrap_or_else(|| "text".to_string());
117        let display_name = registry
118            .find_syntax_for_file(path)
119            .map(|s| s.name.clone())
120            .unwrap_or_else(|| name.clone());
121        Self {
122            name,
123            display_name,
124            highlighter,
125            ts_language,
126        }
127    }
128
129    /// Set language by syntax name (user selected from the language palette).
130    ///
131    /// Looks up the syntax in the grammar registry and optionally finds a
132    /// tree-sitter language for enhanced features. The `languages` config is used
133    /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
134    /// Returns `None` if the syntax name is not found in the registry.
135    pub fn from_syntax_name(
136        name: &str,
137        registry: &GrammarRegistry,
138        languages: &HashMap<String, LanguageConfig>,
139    ) -> Option<Self> {
140        if registry.find_syntax_by_name(name).is_some() {
141            let ts_language = Language::from_name(name);
142            let highlighter = HighlightEngine::for_syntax_name(name, registry, ts_language);
143            // Resolve the canonical language ID from config (e.g., "Rust" → "rust").
144            let language_id =
145                resolve_language_id(name, registry, languages).unwrap_or_else(|| name.to_string());
146            Some(Self {
147                name: language_id,
148                display_name: name.to_string(),
149                highlighter,
150                ts_language,
151            })
152        } else {
153            None
154        }
155    }
156
157    /// Create a DetectedLanguage for a user-configured language that has no
158    /// matching syntect grammar. No syntax highlighting, but the language ID
159    /// is set correctly for config/LSP purposes.
160    pub fn from_config_language(lang_id: &str) -> Self {
161        Self {
162            name: lang_id.to_string(),
163            display_name: lang_id.to_string(),
164            highlighter: HighlightEngine::None,
165            ts_language: None,
166        }
167    }
168
169    /// Plain text — no highlighting.
170    pub fn plain_text() -> Self {
171        Self {
172            name: "text".to_string(),
173            display_name: "Text".to_string(),
174            highlighter: HighlightEngine::None,
175            ts_language: None,
176        }
177    }
178
179    /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
180    ///
181    /// Strips surrounding `*` characters and extracts the filename after any
182    /// prefix like "OLD:" or "NEW:".
183    pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
184        let cleaned = name.trim_matches('*');
185        let filename = if let Some(pos) = cleaned.rfind(':') {
186            &cleaned[pos + 1..]
187        } else {
188            cleaned
189        };
190        Self::from_path_builtin(Path::new(filename), registry)
191    }
192}
193
194/// Resolve a syntect syntax display name to its canonical config language ID.
195///
196/// The config `[languages]` section is the single authoritative registry of
197/// language IDs. Each entry has a `grammar` field that is resolved to a
198/// syntect syntax via the grammar registry. This function performs the reverse
199/// lookup: for each config entry, resolve its grammar through the registry
200/// and check whether the resulting syntax matches.
201pub fn resolve_language_id(
202    syntax_name: &str,
203    registry: &GrammarRegistry,
204    languages: &HashMap<String, LanguageConfig>,
205) -> Option<String> {
206    for (lang_id, lang_config) in languages {
207        // Use find_syntax_for_lang_config which also tries extension fallback,
208        // needed when the grammar name doesn't match syntect's name
209        // (e.g., grammar "c_sharp" → syntect syntax "C#").
210        if let Some(syntax) = registry.find_syntax_for_lang_config(lang_config) {
211            if syntax.name == syntax_name {
212                return Some(lang_id.clone());
213            }
214        }
215    }
216    None
217}