fresh/primitives/detected_language.rs
1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::grammar::GrammarEntry;
9use crate::primitives::highlight_engine::HighlightEngine;
10use crate::primitives::highlighter::Language;
11use crate::primitives::GrammarRegistry;
12use std::collections::HashMap;
13use std::path::Path;
14
15/// The result of language detection — groups the things that must stay in sync
16/// on an `EditorState`: the language ID, display name, highlighting engine, and
17/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
18pub struct DetectedLanguage {
19 /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
20 pub name: String,
21 /// Human-readable display name shown in the status bar and Set Language prompt
22 /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
23 pub display_name: String,
24 /// The highlighting engine to use for this buffer.
25 pub highlighter: HighlightEngine,
26 /// The tree-sitter Language, if available (used for reference highlighting,
27 /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
28 /// support; this is `None` for the remaining 100+ syntect-only languages.
29 pub ts_language: Option<Language>,
30}
31
32impl DetectedLanguage {
33 /// Build a `DetectedLanguage` from a unified catalog entry.
34 ///
35 /// The single place that glues a `GrammarEntry` to a `HighlightEngine`.
36 /// All path-based and name-based constructors funnel through this.
37 pub fn from_entry(entry: &GrammarEntry, registry: &GrammarRegistry) -> Self {
38 Self {
39 name: entry.language_id.clone(),
40 display_name: entry.display_name.clone(),
41 highlighter: HighlightEngine::from_entry(entry, registry),
42 ts_language: entry.engines.tree_sitter,
43 }
44 }
45
46 /// Detect language from a file path using user configuration.
47 ///
48 /// This is the primary detection path used when opening, reloading, or saving files.
49 /// Priority order matches the grammar registry:
50 /// 1. Exact filename match in user config
51 /// 2. Glob pattern match in user config
52 /// 3. Extension match in user config
53 /// 4. Built-in detection (catalog lookup)
54 /// 5. Shebang / first-line regex against `first_line` (catalog lookup)
55 /// 6. Fallback config (if set and no other match found)
56 ///
57 /// `first_line` is the literal first line of the file (including any
58 /// trailing newline). The caller — which has already loaded the buffer
59 /// via the `FileSystem` trait — supplies it so the registry never does
60 /// its own I/O. Pass `None` when there is no content to inspect (e.g.,
61 /// virtual buffers, unsaved files).
62 pub fn from_path(
63 path: &Path,
64 first_line: Option<&str>,
65 registry: &GrammarRegistry,
66 languages: &HashMap<String, LanguageConfig>,
67 ) -> Self {
68 Self::from_path_with_fallback(path, first_line, registry, languages, None)
69 }
70
71 /// Like `from_path`, but also accepts an optional default language name
72 /// that is applied when no language is detected (#1219).
73 /// The `default_language` must reference a key in the `languages` map.
74 pub fn from_path_with_fallback(
75 path: &Path,
76 first_line: Option<&str>,
77 registry: &GrammarRegistry,
78 languages: &HashMap<String, LanguageConfig>,
79 default_language: Option<&str>,
80 ) -> Self {
81 // Resolve the config/LSP language id *independently* of the grammar
82 // catalog. A file matching a `[languages.foo]` rule must end up with
83 // `name = "foo"` so comment prefix / tab config / LSP routing all
84 // work — even when the grammar registry is empty (common in tests)
85 // or has no matching entry.
86 let config_lang_id = crate::services::lsp::manager::detect_language(path, languages);
87 let override_name = |mut d: Self| -> Self {
88 if let Some(id) = config_lang_id.clone() {
89 d.name = id;
90 }
91 d
92 };
93
94 if let Some(entry) = registry.find_by_path(path, first_line) {
95 return override_name(Self::from_entry(entry, registry));
96 }
97
98 // No grammar match — try the user-configured default language for
99 // highlighting, and fall back to plain text. Either way, keep any
100 // config-derived language id.
101 if let Some(lang_key) = default_language {
102 let grammar = languages
103 .get(lang_key)
104 .map(|lc| lc.grammar.as_str())
105 .filter(|g| !g.is_empty())
106 .unwrap_or(lang_key);
107 if let Some(entry) = registry.find_by_name(grammar) {
108 return override_name(Self::from_entry(entry, registry));
109 }
110 }
111
112 override_name(Self::plain_text())
113 }
114
115 /// Set language by syntax name (user selected from the language palette).
116 ///
117 /// Looks up the entry in the unified catalog. The `languages` config is used
118 /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
119 /// Returns `None` if the name matches no catalog entry.
120 pub fn from_syntax_name(
121 name: &str,
122 registry: &GrammarRegistry,
123 languages: &HashMap<String, LanguageConfig>,
124 ) -> Option<Self> {
125 let entry = registry.find_by_name(name)?;
126 let mut detected = Self::from_entry(entry, registry);
127 // Prefer a matching config language ID so LSP lookup works when the
128 // user has declared the language under a different key. `display_name`
129 // keeps the catalog's canonical value ("Bourne Again Shell (bash)"),
130 // not whatever casing the caller typed ("BASH").
131 if let Some(id) = resolve_language_id(&entry.display_name, registry, languages) {
132 detected.name = id;
133 }
134 Some(detected)
135 }
136
137 /// Plain text — no highlighting.
138 pub fn plain_text() -> Self {
139 Self {
140 name: "text".to_string(),
141 display_name: "Text".to_string(),
142 highlighter: HighlightEngine::None,
143 ts_language: None,
144 }
145 }
146
147 /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
148 ///
149 /// Strips surrounding `*` characters and extracts the filename after any
150 /// prefix like "OLD:" or "NEW:".
151 pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
152 let cleaned = name.trim_matches('*');
153 let filename = if let Some(pos) = cleaned.rfind(':') {
154 &cleaned[pos + 1..]
155 } else {
156 cleaned
157 };
158 registry
159 .find_by_path(Path::new(filename), None)
160 .map(|entry| Self::from_entry(entry, registry))
161 .unwrap_or_else(Self::plain_text)
162 }
163}
164
165/// Resolve a syntect syntax display name to its canonical config language ID.
166///
167/// The config `[languages]` section is the single authoritative registry of
168/// language IDs. Each entry has a `grammar` field that is resolved to a
169/// catalog entry; this function performs the reverse lookup.
170pub fn resolve_language_id(
171 syntax_name: &str,
172 registry: &GrammarRegistry,
173 languages: &HashMap<String, LanguageConfig>,
174) -> Option<String> {
175 for (lang_id, lang_config) in languages {
176 if let Some(entry) = registry.find_by_name(&lang_config.grammar) {
177 if entry.display_name == syntax_name {
178 return Some(lang_id.clone());
179 }
180 }
181 }
182 None
183}