Skip to main content

fresh/primitives/
detected_language.rs

1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::highlight_engine::HighlightEngine;
9use crate::primitives::highlighter::Language;
10use crate::primitives::GrammarRegistry;
11use std::collections::HashMap;
12use std::path::Path;
13
14/// The result of language detection — groups the three things that must stay in sync
15/// on an `EditorState`: the language name, the highlighting engine, and the
16/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
17pub struct DetectedLanguage {
18    /// The language name for LSP, status bar, and config lookup
19    /// (e.g., "Rust", "Python", "text", "Plain Text").
20    pub name: String,
21    /// The highlighting engine to use for this buffer.
22    pub highlighter: HighlightEngine,
23    /// The tree-sitter Language, if available (used for reference highlighting,
24    /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
25    /// support; this is `None` for the remaining 100+ syntect-only languages.
26    pub ts_language: Option<Language>,
27}
28
29impl DetectedLanguage {
30    /// Detect language from a file path using user configuration.
31    ///
32    /// This is the primary detection path used when opening, reloading, or saving files.
33    /// Priority order matches the grammar registry:
34    /// 1. Exact filename match in user config
35    /// 2. Glob pattern match in user config
36    /// 3. Extension match in user config
37    /// 4. Built-in detection (tree-sitter `Language::from_path` + syntect)
38    pub fn from_path(
39        path: &Path,
40        registry: &GrammarRegistry,
41        languages: &HashMap<String, LanguageConfig>,
42    ) -> Self {
43        let highlighter = HighlightEngine::for_file_with_languages(path, registry, languages);
44        let ts_language = Language::from_path(path);
45        let name = if let Some(lang) = &ts_language {
46            lang.to_string()
47        } else {
48            crate::services::lsp::manager::detect_language(path, languages)
49                .unwrap_or_else(|| "text".to_string())
50        };
51        Self {
52            name,
53            highlighter,
54            ts_language,
55        }
56    }
57
58    /// Detect language from a file path using only built-in rules (no user config).
59    ///
60    /// Used by `from_file()` (the legacy constructor) and for virtual buffer names
61    /// where user config doesn't apply.
62    pub fn from_path_builtin(path: &Path, registry: &GrammarRegistry) -> Self {
63        let highlighter = HighlightEngine::for_file(path, registry);
64        let ts_language = Language::from_path(path);
65        let name = ts_language
66            .as_ref()
67            .map(|l| l.to_string())
68            .unwrap_or_else(|| "text".to_string());
69        Self {
70            name,
71            highlighter,
72            ts_language,
73        }
74    }
75
76    /// Set language by syntax name (user selected from the language palette).
77    ///
78    /// Looks up the syntax in the grammar registry and optionally finds a
79    /// tree-sitter language for enhanced features. The `languages` config is used
80    /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
81    /// Returns `None` if the syntax name is not found in the registry.
82    pub fn from_syntax_name(
83        name: &str,
84        registry: &GrammarRegistry,
85        languages: &HashMap<String, LanguageConfig>,
86    ) -> Option<Self> {
87        if registry.find_syntax_by_name(name).is_some() {
88            let ts_language = Language::from_name(name);
89            let highlighter = HighlightEngine::for_syntax_name(name, registry, ts_language);
90            // Resolve the canonical language ID from config (e.g., "Rust" → "rust").
91            let language_id =
92                resolve_language_id(name, registry, languages).unwrap_or_else(|| name.to_string());
93            Some(Self {
94                name: language_id,
95                highlighter,
96                ts_language,
97            })
98        } else {
99            None
100        }
101    }
102
103    /// Plain text — no highlighting.
104    pub fn plain_text() -> Self {
105        Self {
106            name: "text".to_string(),
107            highlighter: HighlightEngine::None,
108            ts_language: None,
109        }
110    }
111
112    /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
113    ///
114    /// Strips surrounding `*` characters and extracts the filename after any
115    /// prefix like "OLD:" or "NEW:".
116    pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
117        let cleaned = name.trim_matches('*');
118        let filename = if let Some(pos) = cleaned.rfind(':') {
119            &cleaned[pos + 1..]
120        } else {
121            cleaned
122        };
123        Self::from_path_builtin(Path::new(filename), registry)
124    }
125}
126
127/// Resolve a syntect syntax display name to its canonical config language ID.
128///
129/// The config `[languages]` section is the single authoritative registry of
130/// language IDs. Each entry has a `grammar` field that is resolved to a
131/// syntect syntax via `GrammarRegistry::find_syntax_by_name`. This function
132/// performs the reverse lookup: for each config entry, resolve its grammar
133/// through the registry and check whether the resulting syntax matches.
134pub fn resolve_language_id(
135    syntax_name: &str,
136    registry: &GrammarRegistry,
137    languages: &HashMap<String, LanguageConfig>,
138) -> Option<String> {
139    for (lang_id, lang_config) in languages {
140        if let Some(syntax) = registry.find_syntax_by_name(&lang_config.grammar) {
141            if syntax.name == syntax_name {
142                return Some(lang_id.clone());
143            }
144        }
145    }
146    None
147}