Skip to main content

fresh/primitives/
detected_language.rs

1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::grammar::GrammarEntry;
9use crate::primitives::highlight_engine::HighlightEngine;
10use crate::primitives::highlighter::Language;
11use crate::primitives::GrammarRegistry;
12use std::collections::HashMap;
13use std::path::Path;
14
15/// The result of language detection — groups the things that must stay in sync
16/// on an `EditorState`: the language ID, display name, highlighting engine, and
17/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
18pub struct DetectedLanguage {
19    /// The canonical language ID for LSP and config lookup (e.g., "csharp", "rust", "text").
20    pub name: String,
21    /// Human-readable display name shown in the status bar and Set Language prompt
22    /// (e.g., "C#", "Rust", "Plain Text"). Matches the syntect syntax name where available.
23    pub display_name: String,
24    /// The highlighting engine to use for this buffer.
25    pub highlighter: HighlightEngine,
26    /// The tree-sitter Language, if available (used for reference highlighting,
27    /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
28    /// support; this is `None` for the remaining 100+ syntect-only languages.
29    pub ts_language: Option<Language>,
30}
31
32impl DetectedLanguage {
33    /// Build a `DetectedLanguage` from a unified catalog entry.
34    ///
35    /// The single place that glues a `GrammarEntry` to a `HighlightEngine`.
36    /// All path-based and name-based constructors funnel through this.
37    pub fn from_entry(entry: &GrammarEntry, registry: &GrammarRegistry) -> Self {
38        Self {
39            name: entry.language_id.clone(),
40            display_name: entry.display_name.clone(),
41            highlighter: HighlightEngine::from_entry(entry, registry),
42            ts_language: entry.engines.tree_sitter,
43        }
44    }
45
46    /// Detect language from a file path using user configuration.
47    ///
48    /// This is the primary detection path used when opening, reloading, or saving files.
49    /// Priority order matches the grammar registry:
50    /// 1. Exact filename match in user config
51    /// 2. Glob pattern match in user config
52    /// 3. Extension match in user config
53    /// 4. Built-in detection (catalog lookup)
54    /// 5. Shebang / first-line regex against `first_line` (catalog lookup)
55    /// 6. Fallback config (if set and no other match found)
56    ///
57    /// `first_line` is the literal first line of the file (including any
58    /// trailing newline). The caller — which has already loaded the buffer
59    /// via the `FileSystem` trait — supplies it so the registry never does
60    /// its own I/O. Pass `None` when there is no content to inspect (e.g.,
61    /// virtual buffers, unsaved files).
62    pub fn from_path(
63        path: &Path,
64        first_line: Option<&str>,
65        registry: &GrammarRegistry,
66        languages: &HashMap<String, LanguageConfig>,
67    ) -> Self {
68        Self::from_path_with_fallback(path, first_line, registry, languages, None)
69    }
70
71    /// Like `from_path`, but also accepts an optional default language name
72    /// that is applied when no language is detected (#1219).
73    /// The `default_language` must reference a key in the `languages` map.
74    pub fn from_path_with_fallback(
75        path: &Path,
76        first_line: Option<&str>,
77        registry: &GrammarRegistry,
78        languages: &HashMap<String, LanguageConfig>,
79        default_language: Option<&str>,
80    ) -> Self {
81        // Resolve the config/LSP language id *independently* of the grammar
82        // catalog. A file matching a `[languages.foo]` rule must end up with
83        // `name = "foo"` so comment prefix / tab config / LSP routing all
84        // work — even when the grammar registry is empty (common in tests)
85        // or has no matching entry.
86        let config_lang_id = crate::services::lsp::manager::detect_language(path, languages);
87        let override_name = |mut d: Self| -> Self {
88            if let Some(id) = config_lang_id.clone() {
89                d.name = id;
90            }
91            d
92        };
93
94        if let Some(entry) = registry.find_by_path(path, first_line) {
95            return override_name(Self::from_entry(entry, registry));
96        }
97
98        // No grammar match — try the user-configured default language for
99        // highlighting, and fall back to plain text. Either way, keep any
100        // config-derived language id.
101        if let Some(lang_key) = default_language {
102            let grammar = languages
103                .get(lang_key)
104                .map(|lc| lc.grammar.as_str())
105                .filter(|g| !g.is_empty())
106                .unwrap_or(lang_key);
107            if let Some(entry) = registry.find_by_name(grammar) {
108                return override_name(Self::from_entry(entry, registry));
109            }
110        }
111
112        override_name(Self::plain_text())
113    }
114
115    /// Set language by syntax name (user selected from the language palette).
116    ///
117    /// Looks up the entry in the unified catalog. The `languages` config is used
118    /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
119    /// Returns `None` if the name matches no catalog entry.
120    pub fn from_syntax_name(
121        name: &str,
122        registry: &GrammarRegistry,
123        languages: &HashMap<String, LanguageConfig>,
124    ) -> Option<Self> {
125        let entry = registry.find_by_name(name)?;
126        let mut detected = Self::from_entry(entry, registry);
127        // Prefer a matching config language ID so LSP lookup works when the
128        // user has declared the language under a different key. `display_name`
129        // keeps the catalog's canonical value ("Bourne Again Shell (bash)"),
130        // not whatever casing the caller typed ("BASH").
131        if let Some(id) = resolve_language_id(&entry.display_name, registry, languages) {
132            detected.name = id;
133        }
134        Some(detected)
135    }
136
137    /// Plain text — no highlighting.
138    pub fn plain_text() -> Self {
139        Self {
140            name: "text".to_string(),
141            display_name: "Text".to_string(),
142            highlighter: HighlightEngine::None,
143            ts_language: None,
144        }
145    }
146
147    /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
148    ///
149    /// Strips surrounding `*` characters and extracts the filename after any
150    /// prefix like "OLD:" or "NEW:".
151    pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
152        let cleaned = name.trim_matches('*');
153        let filename = if let Some(pos) = cleaned.rfind(':') {
154            &cleaned[pos + 1..]
155        } else {
156            cleaned
157        };
158        registry
159            .find_by_path(Path::new(filename), None)
160            .map(|entry| Self::from_entry(entry, registry))
161            .unwrap_or_else(Self::plain_text)
162    }
163}
164
165/// Resolve a syntect syntax display name to its canonical config language ID.
166///
167/// The config `[languages]` section is the single authoritative registry of
168/// language IDs. Each entry has a `grammar` field that is resolved to a
169/// catalog entry; this function performs the reverse lookup.
170pub fn resolve_language_id(
171    syntax_name: &str,
172    registry: &GrammarRegistry,
173    languages: &HashMap<String, LanguageConfig>,
174) -> Option<String> {
175    for (lang_id, lang_config) in languages {
176        if let Some(entry) = registry.find_by_name(&lang_config.grammar) {
177            if entry.display_name == syntax_name {
178                return Some(lang_id.clone());
179            }
180        }
181    }
182    None
183}