Skip to main content

fresh/primitives/
detected_language.rs

1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::grammar::GrammarEntry;
9use crate::primitives::highlight_engine::HighlightEngine;
10use crate::primitives::highlighter::Language;
11use crate::primitives::GrammarRegistry;
12use std::collections::HashMap;
13use std::path::Path;
14
15/// The result of language detection — groups the things that must stay in sync
16/// on an `EditorState`: the language ID, display name, highlighting engine, and
17/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
18pub struct DetectedLanguage {
19    /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
20    pub name: String,
21    /// Human-readable display name shown in the status bar and Set Language prompt
22    /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
23    pub display_name: String,
24    /// The highlighting engine to use for this buffer.
25    pub highlighter: HighlightEngine,
26    /// The tree-sitter Language, if available (used for reference highlighting,
27    /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
28    /// support; this is `None` for the remaining 100+ syntect-only languages.
29    pub ts_language: Option<Language>,
30}
31
32impl DetectedLanguage {
33    /// Build a `DetectedLanguage` from a unified catalog entry.
34    ///
35    /// The single place that glues a `GrammarEntry` to a `HighlightEngine`.
36    /// All path-based and name-based constructors funnel through this.
37    pub fn from_entry(entry: &GrammarEntry, registry: &GrammarRegistry) -> Self {
38        Self {
39            name: entry.language_id.clone(),
40            display_name: entry.display_name.clone(),
41            highlighter: HighlightEngine::from_entry(entry, registry),
42            ts_language: entry.engines.tree_sitter,
43        }
44    }
45
46    /// Detect language from a file path using user configuration.
47    ///
48    /// This is the primary detection path used when opening, reloading, or saving files.
49    /// Priority order matches the grammar registry:
50    /// 1. Exact filename match in user config
51    /// 2. Glob pattern match in user config
52    /// 3. Extension match in user config
53    /// 4. Built-in detection (catalog lookup)
54    /// 5. Fallback config (if set and no other match found)
55    pub fn from_path(
56        path: &Path,
57        registry: &GrammarRegistry,
58        languages: &HashMap<String, LanguageConfig>,
59    ) -> Self {
60        Self::from_path_with_fallback(path, registry, languages, None)
61    }
62
63    /// Like `from_path`, but also accepts an optional default language name
64    /// that is applied when no language is detected (#1219).
65    /// The `default_language` must reference a key in the `languages` map.
66    pub fn from_path_with_fallback(
67        path: &Path,
68        registry: &GrammarRegistry,
69        languages: &HashMap<String, LanguageConfig>,
70        default_language: Option<&str>,
71    ) -> Self {
72        // Resolve the config/LSP language id *independently* of the grammar
73        // catalog. A file matching a `[languages.foo]` rule must end up with
74        // `name = "foo"` so comment prefix / tab config / LSP routing all
75        // work — even when the grammar registry is empty (common in tests)
76        // or has no matching entry.
77        let config_lang_id = crate::services::lsp::manager::detect_language(path, languages);
78        let override_name = |mut d: Self| -> Self {
79            if let Some(id) = config_lang_id.clone() {
80                d.name = id;
81            }
82            d
83        };
84
85        if let Some(entry) = registry.find_by_path(path) {
86            return override_name(Self::from_entry(entry, registry));
87        }
88
89        // No grammar match — try the user-configured default language for
90        // highlighting, and fall back to plain text. Either way, keep any
91        // config-derived language id.
92        if let Some(lang_key) = default_language {
93            let grammar = languages
94                .get(lang_key)
95                .map(|lc| lc.grammar.as_str())
96                .filter(|g| !g.is_empty())
97                .unwrap_or(lang_key);
98            if let Some(entry) = registry.find_by_name(grammar) {
99                return override_name(Self::from_entry(entry, registry));
100            }
101        }
102
103        override_name(Self::plain_text())
104    }
105
106    /// Set language by syntax name (user selected from the language palette).
107    ///
108    /// Looks up the entry in the unified catalog. The `languages` config is used
109    /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
110    /// Returns `None` if the name matches no catalog entry.
111    pub fn from_syntax_name(
112        name: &str,
113        registry: &GrammarRegistry,
114        languages: &HashMap<String, LanguageConfig>,
115    ) -> Option<Self> {
116        let entry = registry.find_by_name(name)?;
117        let mut detected = Self::from_entry(entry, registry);
118        // Prefer a matching config language ID so LSP lookup works when the
119        // user has declared the language under a different key. `display_name`
120        // keeps the catalog's canonical value ("Bourne Again Shell (bash)"),
121        // not whatever casing the caller typed ("BASH").
122        if let Some(id) = resolve_language_id(&entry.display_name, registry, languages) {
123            detected.name = id;
124        }
125        Some(detected)
126    }
127
128    /// Plain text — no highlighting.
129    pub fn plain_text() -> Self {
130        Self {
131            name: "text".to_string(),
132            display_name: "Text".to_string(),
133            highlighter: HighlightEngine::None,
134            ts_language: None,
135        }
136    }
137
138    /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
139    ///
140    /// Strips surrounding `*` characters and extracts the filename after any
141    /// prefix like "OLD:" or "NEW:".
142    pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
143        let cleaned = name.trim_matches('*');
144        let filename = if let Some(pos) = cleaned.rfind(':') {
145            &cleaned[pos + 1..]
146        } else {
147            cleaned
148        };
149        registry
150            .find_by_path(Path::new(filename))
151            .map(|entry| Self::from_entry(entry, registry))
152            .unwrap_or_else(Self::plain_text)
153    }
154}
155
156/// Resolve a syntect syntax display name to its canonical config language ID.
157///
158/// The config `[languages]` section is the single authoritative registry of
159/// language IDs. Each entry has a `grammar` field that is resolved to a
160/// catalog entry; this function performs the reverse lookup.
161pub fn resolve_language_id(
162    syntax_name: &str,
163    registry: &GrammarRegistry,
164    languages: &HashMap<String, LanguageConfig>,
165) -> Option<String> {
166    for (lang_id, lang_config) in languages {
167        if let Some(entry) = registry.find_by_name(&lang_config.grammar) {
168            if entry.display_name == syntax_name {
169                return Some(lang_id.clone());
170            }
171        }
172    }
173    None
174}