fresh/primitives/detected_language.rs
1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::grammar::GrammarEntry;
9use crate::primitives::highlight_engine::HighlightEngine;
10use crate::primitives::highlighter::Language;
11use crate::primitives::GrammarRegistry;
12use std::collections::HashMap;
13use std::path::Path;
14
15/// The result of language detection — groups the things that must stay in sync
16/// on an `EditorState`: the language ID, display name, highlighting engine, and
17/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
18pub struct DetectedLanguage {
19 /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
20 pub name: String,
21 /// Human-readable display name shown in the status bar and Set Language prompt
22 /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
23 pub display_name: String,
24 /// The highlighting engine to use for this buffer.
25 pub highlighter: HighlightEngine,
26 /// The tree-sitter Language, if available (used for reference highlighting,
27 /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
28 /// support; this is `None` for the remaining 100+ syntect-only languages.
29 pub ts_language: Option<Language>,
30}
31
32impl DetectedLanguage {
33 /// Build a `DetectedLanguage` from a unified catalog entry.
34 ///
35 /// The single place that glues a `GrammarEntry` to a `HighlightEngine`.
36 /// All path-based and name-based constructors funnel through this.
37 pub fn from_entry(entry: &GrammarEntry, registry: &GrammarRegistry) -> Self {
38 Self {
39 name: entry.language_id.clone(),
40 display_name: entry.display_name.clone(),
41 highlighter: HighlightEngine::from_entry(entry, registry),
42 ts_language: entry.engines.tree_sitter,
43 }
44 }
45
46 /// Detect language from a file path using user configuration.
47 ///
48 /// This is the primary detection path used when opening, reloading, or saving files.
49 /// Priority order matches the grammar registry:
50 /// 1. Exact filename match in user config
51 /// 2. Glob pattern match in user config
52 /// 3. Extension match in user config
53 /// 4. Built-in detection (catalog lookup)
54 /// 5. Fallback config (if set and no other match found)
55 pub fn from_path(
56 path: &Path,
57 registry: &GrammarRegistry,
58 languages: &HashMap<String, LanguageConfig>,
59 ) -> Self {
60 Self::from_path_with_fallback(path, registry, languages, None)
61 }
62
63 /// Like `from_path`, but also accepts an optional default language name
64 /// that is applied when no language is detected (#1219).
65 /// The `default_language` must reference a key in the `languages` map.
66 pub fn from_path_with_fallback(
67 path: &Path,
68 registry: &GrammarRegistry,
69 languages: &HashMap<String, LanguageConfig>,
70 default_language: Option<&str>,
71 ) -> Self {
72 // Resolve the config/LSP language id *independently* of the grammar
73 // catalog. A file matching a `[languages.foo]` rule must end up with
74 // `name = "foo"` so comment prefix / tab config / LSP routing all
75 // work — even when the grammar registry is empty (common in tests)
76 // or has no matching entry.
77 let config_lang_id = crate::services::lsp::manager::detect_language(path, languages);
78 let override_name = |mut d: Self| -> Self {
79 if let Some(id) = config_lang_id.clone() {
80 d.name = id;
81 }
82 d
83 };
84
85 if let Some(entry) = registry.find_by_path(path) {
86 return override_name(Self::from_entry(entry, registry));
87 }
88
89 // No grammar match — try the user-configured default language for
90 // highlighting, and fall back to plain text. Either way, keep any
91 // config-derived language id.
92 if let Some(lang_key) = default_language {
93 let grammar = languages
94 .get(lang_key)
95 .map(|lc| lc.grammar.as_str())
96 .filter(|g| !g.is_empty())
97 .unwrap_or(lang_key);
98 if let Some(entry) = registry.find_by_name(grammar) {
99 return override_name(Self::from_entry(entry, registry));
100 }
101 }
102
103 override_name(Self::plain_text())
104 }
105
106 /// Set language by syntax name (user selected from the language palette).
107 ///
108 /// Looks up the entry in the unified catalog. The `languages` config is used
109 /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
110 /// Returns `None` if the name matches no catalog entry.
111 pub fn from_syntax_name(
112 name: &str,
113 registry: &GrammarRegistry,
114 languages: &HashMap<String, LanguageConfig>,
115 ) -> Option<Self> {
116 let entry = registry.find_by_name(name)?;
117 let mut detected = Self::from_entry(entry, registry);
118 // Prefer a matching config language ID so LSP lookup works when the
119 // user has declared the language under a different key. `display_name`
120 // keeps the catalog's canonical value ("Bourne Again Shell (bash)"),
121 // not whatever casing the caller typed ("BASH").
122 if let Some(id) = resolve_language_id(&entry.display_name, registry, languages) {
123 detected.name = id;
124 }
125 Some(detected)
126 }
127
128 /// Plain text — no highlighting.
129 pub fn plain_text() -> Self {
130 Self {
131 name: "text".to_string(),
132 display_name: "Text".to_string(),
133 highlighter: HighlightEngine::None,
134 ts_language: None,
135 }
136 }
137
138 /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
139 ///
140 /// Strips surrounding `*` characters and extracts the filename after any
141 /// prefix like "OLD:" or "NEW:".
142 pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
143 let cleaned = name.trim_matches('*');
144 let filename = if let Some(pos) = cleaned.rfind(':') {
145 &cleaned[pos + 1..]
146 } else {
147 cleaned
148 };
149 registry
150 .find_by_path(Path::new(filename))
151 .map(|entry| Self::from_entry(entry, registry))
152 .unwrap_or_else(Self::plain_text)
153 }
154}
155
156/// Resolve a syntect syntax display name to its canonical config language ID.
157///
158/// The config `[languages]` section is the single authoritative registry of
159/// language IDs. Each entry has a `grammar` field that is resolved to a
160/// catalog entry; this function performs the reverse lookup.
161pub fn resolve_language_id(
162 syntax_name: &str,
163 registry: &GrammarRegistry,
164 languages: &HashMap<String, LanguageConfig>,
165) -> Option<String> {
166 for (lang_id, lang_config) in languages {
167 if let Some(entry) = registry.find_by_name(&lang_config.grammar) {
168 if entry.display_name == syntax_name {
169 return Some(lang_id.clone());
170 }
171 }
172 }
173 None
174}