fresh/primitives/detected_language.rs
1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::highlight_engine::HighlightEngine;
9use crate::primitives::highlighter::Language;
10use crate::primitives::GrammarRegistry;
11use std::collections::HashMap;
12use std::path::Path;
13
14/// The result of language detection — groups the things that must stay in sync
15/// on an `EditorState`: the language ID, display name, highlighting engine, and
16/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
17pub struct DetectedLanguage {
18 /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
19 pub name: String,
20 /// Human-readable display name shown in the status bar and Set Language prompt
21 /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
22 pub display_name: String,
23 /// The highlighting engine to use for this buffer.
24 pub highlighter: HighlightEngine,
25 /// The tree-sitter Language, if available (used for reference highlighting,
26 /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
27 /// support; this is `None` for the remaining 100+ syntect-only languages.
28 pub ts_language: Option<Language>,
29}
30
31impl DetectedLanguage {
32 /// Detect language from a file path using user configuration.
33 ///
34 /// This is the primary detection path used when opening, reloading, or saving files.
35 /// Priority order matches the grammar registry:
36 /// 1. Exact filename match in user config
37 /// 2. Glob pattern match in user config
38 /// 3. Extension match in user config
39 /// 4. Built-in detection (tree-sitter `Language::from_path` + syntect)
40 /// 5. Fallback config (if set and no other match found)
41 pub fn from_path(
42 path: &Path,
43 registry: &GrammarRegistry,
44 languages: &HashMap<String, LanguageConfig>,
45 ) -> Self {
46 Self::from_path_with_fallback(path, registry, languages, None)
47 }
48
49 /// Like `from_path`, but also accepts an optional default language name
50 /// that is applied when no language is detected (#1219).
51 /// The `default_language` must reference a key in the `languages` map.
52 pub fn from_path_with_fallback(
53 path: &Path,
54 registry: &GrammarRegistry,
55 languages: &HashMap<String, LanguageConfig>,
56 default_language: Option<&str>,
57 ) -> Self {
58 let highlighter = HighlightEngine::for_file(path, registry, Some(languages));
59 let ts_language = Language::from_path(path);
60 // Prefer config-based language name (e.g., "csharp") so it matches
61 // the LSP config key. Fall back to tree-sitter name (e.g., "c_sharp")
62 // or "text" if neither is available.
63 let name =
64 crate::services::lsp::manager::detect_language(path, languages).unwrap_or_else(|| {
65 ts_language
66 .as_ref()
67 .map(|l| l.to_string())
68 .unwrap_or_else(|| "text".to_string())
69 });
70 // Resolve display name from the syntax matched for this file.
71 let display_name = registry
72 .find_syntax_for_file_with_languages(path, languages)
73 .map(|s| s.name.clone())
74 .unwrap_or_else(|| name.clone());
75
76 // If no language was detected and a default_language is configured,
77 // look up its grammar for highlighting (#1219)
78 if name == "text" && matches!(highlighter, HighlightEngine::None) {
79 if let Some(lang_key) = default_language {
80 let grammar = languages
81 .get(lang_key)
82 .map(|lc| lc.grammar.as_str())
83 .filter(|g| !g.is_empty())
84 .unwrap_or(lang_key);
85 let fb_highlighter =
86 HighlightEngine::for_syntax_name(grammar, registry, ts_language);
87 if !matches!(fb_highlighter, HighlightEngine::None) {
88 let fb_display = registry
89 .find_syntax_by_name(grammar)
90 .map(|s| s.name.clone())
91 .unwrap_or_else(|| grammar.to_string());
92 return Self {
93 name,
94 display_name: fb_display,
95 highlighter: fb_highlighter,
96 ts_language,
97 };
98 }
99 }
100 }
101
102 Self {
103 name,
104 display_name,
105 highlighter,
106 ts_language,
107 }
108 }
109
110 /// Detect language from a file path using only built-in rules (no user config).
111 ///
112 /// Used by `from_file()` (the legacy constructor) and for virtual buffer names
113 /// where user config doesn't apply.
114 pub fn from_path_builtin(path: &Path, registry: &GrammarRegistry) -> Self {
115 let highlighter = HighlightEngine::for_file(path, registry, None);
116 let ts_language = Language::from_path(path);
117 let name = ts_language
118 .as_ref()
119 .map(|l| l.to_string())
120 .unwrap_or_else(|| "text".to_string());
121 let display_name = registry
122 .find_syntax_for_file(path)
123 .map(|s| s.name.clone())
124 .unwrap_or_else(|| name.clone());
125 Self {
126 name,
127 display_name,
128 highlighter,
129 ts_language,
130 }
131 }
132
133 /// Set language by syntax name (user selected from the language palette).
134 ///
135 /// Looks up the syntax in the grammar registry and optionally finds a
136 /// tree-sitter language for enhanced features. The `languages` config is used
137 /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
138 /// Returns `None` if the syntax name is not found in the registry.
139 pub fn from_syntax_name(
140 name: &str,
141 registry: &GrammarRegistry,
142 languages: &HashMap<String, LanguageConfig>,
143 ) -> Option<Self> {
144 if registry.find_syntax_by_name(name).is_some() {
145 let ts_language = Language::from_name(name);
146 let highlighter = HighlightEngine::for_syntax_name(name, registry, ts_language);
147 // Resolve the canonical language ID from config (e.g., "Rust" → "rust").
148 let language_id =
149 resolve_language_id(name, registry, languages).unwrap_or_else(|| name.to_string());
150 Some(Self {
151 name: language_id,
152 display_name: name.to_string(),
153 highlighter,
154 ts_language,
155 })
156 } else {
157 None
158 }
159 }
160
161 /// Create a DetectedLanguage for a user-configured language that has no
162 /// matching syntect grammar. No syntax highlighting, but the language ID
163 /// is set correctly for config/LSP purposes.
164 pub fn from_config_language(lang_id: &str) -> Self {
165 Self {
166 name: lang_id.to_string(),
167 display_name: lang_id.to_string(),
168 highlighter: HighlightEngine::None,
169 ts_language: None,
170 }
171 }
172
173 /// Plain text — no highlighting.
174 pub fn plain_text() -> Self {
175 Self {
176 name: "text".to_string(),
177 display_name: "Text".to_string(),
178 highlighter: HighlightEngine::None,
179 ts_language: None,
180 }
181 }
182
183 /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
184 ///
185 /// Strips surrounding `*` characters and extracts the filename after any
186 /// prefix like "OLD:" or "NEW:".
187 pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
188 let cleaned = name.trim_matches('*');
189 let filename = if let Some(pos) = cleaned.rfind(':') {
190 &cleaned[pos + 1..]
191 } else {
192 cleaned
193 };
194 Self::from_path_builtin(Path::new(filename), registry)
195 }
196}
197
198/// Resolve a syntect syntax display name to its canonical config language ID.
199///
200/// The config `[languages]` section is the single authoritative registry of
201/// language IDs. Each entry has a `grammar` field that is resolved to a
202/// syntect syntax via the grammar registry. This function performs the reverse
203/// lookup: for each config entry, resolve its grammar through the registry
204/// and check whether the resulting syntax matches.
205pub fn resolve_language_id(
206 syntax_name: &str,
207 registry: &GrammarRegistry,
208 languages: &HashMap<String, LanguageConfig>,
209) -> Option<String> {
210 for (lang_id, lang_config) in languages {
211 // Use find_syntax_for_lang_config which also tries extension fallback,
212 // needed when the grammar name doesn't match syntect's name
213 // (e.g., grammar "c_sharp" → syntect syntax "C#").
214 if let Some(syntax) = registry.find_syntax_for_lang_config(lang_config) {
215 if syntax.name == syntax_name {
216 return Some(lang_id.clone());
217 }
218 }
219 }
220 None
221}