fresh/primitives/detected_language.rs
1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::highlight_engine::HighlightEngine;
9use crate::primitives::highlighter::Language;
10use crate::primitives::GrammarRegistry;
11use std::collections::HashMap;
12use std::path::Path;
13
14/// The result of language detection — groups the things that must stay in sync
15/// on an `EditorState`: the language ID, display name, highlighting engine, and
16/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
17pub struct DetectedLanguage {
18 /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
19 pub name: String,
20 /// Human-readable display name shown in the status bar and Set Language prompt
21 /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
22 pub display_name: String,
23 /// The highlighting engine to use for this buffer.
24 pub highlighter: HighlightEngine,
25 /// The tree-sitter Language, if available (used for reference highlighting,
26 /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
27 /// support; this is `None` for the remaining 100+ syntect-only languages.
28 pub ts_language: Option<Language>,
29}
30
31impl DetectedLanguage {
32 /// Detect language from a file path using user configuration.
33 ///
34 /// This is the primary detection path used when opening, reloading, or saving files.
35 /// Priority order matches the grammar registry:
36 /// 1. Exact filename match in user config
37 /// 2. Glob pattern match in user config
38 /// 3. Extension match in user config
39 /// 4. Built-in detection (tree-sitter `Language::from_path` + syntect)
40 /// 5. Fallback config (if set and no other match found)
41 pub fn from_path(
42 path: &Path,
43 registry: &GrammarRegistry,
44 languages: &HashMap<String, LanguageConfig>,
45 ) -> Self {
46 Self::from_path_with_fallback(path, registry, languages, None)
47 }
48
49 /// Like `from_path`, but also accepts an optional fallback language config
50 /// that is applied when no language is detected (#1219).
51 pub fn from_path_with_fallback(
52 path: &Path,
53 registry: &GrammarRegistry,
54 languages: &HashMap<String, LanguageConfig>,
55 fallback: Option<&LanguageConfig>,
56 ) -> Self {
57 let highlighter = HighlightEngine::for_file_with_languages(path, registry, languages);
58 let ts_language = Language::from_path(path);
59 // Prefer config-based language name (e.g., "csharp") so it matches
60 // the LSP config key. Fall back to tree-sitter name (e.g., "c_sharp")
61 // or "text" if neither is available.
62 let name =
63 crate::services::lsp::manager::detect_language(path, languages).unwrap_or_else(|| {
64 ts_language
65 .as_ref()
66 .map(|l| l.to_string())
67 .unwrap_or_else(|| "text".to_string())
68 });
69 // Resolve display name from the syntax matched for this file.
70 let display_name = registry
71 .find_syntax_for_file_with_languages(path, languages)
72 .map(|s| s.name.clone())
73 .unwrap_or_else(|| name.clone());
74
75 // If no language was detected and a fallback config is set with a grammar,
76 // try to use the fallback grammar for highlighting (#1219)
77 if name == "text" && matches!(highlighter, HighlightEngine::None) {
78 if let Some(fb) = fallback {
79 if !fb.grammar.is_empty() {
80 let fb_highlighter =
81 HighlightEngine::for_syntax_name(&fb.grammar, registry, ts_language);
82 if !matches!(fb_highlighter, HighlightEngine::None) {
83 let fb_display = registry
84 .find_syntax_by_name(&fb.grammar)
85 .map(|s| s.name.clone())
86 .unwrap_or_else(|| fb.grammar.clone());
87 return Self {
88 name,
89 display_name: fb_display,
90 highlighter: fb_highlighter,
91 ts_language,
92 };
93 }
94 }
95 }
96 }
97
98 Self {
99 name,
100 display_name,
101 highlighter,
102 ts_language,
103 }
104 }
105
106 /// Detect language from a file path using only built-in rules (no user config).
107 ///
108 /// Used by `from_file()` (the legacy constructor) and for virtual buffer names
109 /// where user config doesn't apply.
110 pub fn from_path_builtin(path: &Path, registry: &GrammarRegistry) -> Self {
111 let highlighter = HighlightEngine::for_file(path, registry);
112 let ts_language = Language::from_path(path);
113 let name = ts_language
114 .as_ref()
115 .map(|l| l.to_string())
116 .unwrap_or_else(|| "text".to_string());
117 let display_name = registry
118 .find_syntax_for_file(path)
119 .map(|s| s.name.clone())
120 .unwrap_or_else(|| name.clone());
121 Self {
122 name,
123 display_name,
124 highlighter,
125 ts_language,
126 }
127 }
128
129 /// Set language by syntax name (user selected from the language palette).
130 ///
131 /// Looks up the syntax in the grammar registry and optionally finds a
132 /// tree-sitter language for enhanced features. The `languages` config is used
133 /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
134 /// Returns `None` if the syntax name is not found in the registry.
135 pub fn from_syntax_name(
136 name: &str,
137 registry: &GrammarRegistry,
138 languages: &HashMap<String, LanguageConfig>,
139 ) -> Option<Self> {
140 if registry.find_syntax_by_name(name).is_some() {
141 let ts_language = Language::from_name(name);
142 let highlighter = HighlightEngine::for_syntax_name(name, registry, ts_language);
143 // Resolve the canonical language ID from config (e.g., "Rust" → "rust").
144 let language_id =
145 resolve_language_id(name, registry, languages).unwrap_or_else(|| name.to_string());
146 Some(Self {
147 name: language_id,
148 display_name: name.to_string(),
149 highlighter,
150 ts_language,
151 })
152 } else {
153 None
154 }
155 }
156
157 /// Create a DetectedLanguage for a user-configured language that has no
158 /// matching syntect grammar. No syntax highlighting, but the language ID
159 /// is set correctly for config/LSP purposes.
160 pub fn from_config_language(lang_id: &str) -> Self {
161 Self {
162 name: lang_id.to_string(),
163 display_name: lang_id.to_string(),
164 highlighter: HighlightEngine::None,
165 ts_language: None,
166 }
167 }
168
169 /// Plain text — no highlighting.
170 pub fn plain_text() -> Self {
171 Self {
172 name: "text".to_string(),
173 display_name: "Text".to_string(),
174 highlighter: HighlightEngine::None,
175 ts_language: None,
176 }
177 }
178
179 /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
180 ///
181 /// Strips surrounding `*` characters and extracts the filename after any
182 /// prefix like "OLD:" or "NEW:".
183 pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
184 let cleaned = name.trim_matches('*');
185 let filename = if let Some(pos) = cleaned.rfind(':') {
186 &cleaned[pos + 1..]
187 } else {
188 cleaned
189 };
190 Self::from_path_builtin(Path::new(filename), registry)
191 }
192}
193
194/// Resolve a syntect syntax display name to its canonical config language ID.
195///
196/// The config `[languages]` section is the single authoritative registry of
197/// language IDs. Each entry has a `grammar` field that is resolved to a
198/// syntect syntax via the grammar registry. This function performs the reverse
199/// lookup: for each config entry, resolve its grammar through the registry
200/// and check whether the resulting syntax matches.
201pub fn resolve_language_id(
202 syntax_name: &str,
203 registry: &GrammarRegistry,
204 languages: &HashMap<String, LanguageConfig>,
205) -> Option<String> {
206 for (lang_id, lang_config) in languages {
207 // Use find_syntax_for_lang_config which also tries extension fallback,
208 // needed when the grammar name doesn't match syntect's name
209 // (e.g., grammar "c_sharp" → syntect syntax "C#").
210 if let Some(syntax) = registry.find_syntax_for_lang_config(lang_config) {
211 if syntax.name == syntax_name {
212 return Some(lang_id.clone());
213 }
214 }
215 }
216 None
217}