fresh/primitives/detected_language.rs
1//! Unified language detection for editor buffers.
2//!
3//! This module provides `DetectedLanguage`, the single source of truth for
4//! determining a buffer's language, syntax highlighter, and tree-sitter support.
5//! All code paths that set or change a buffer's language should go through this module.
6
7use crate::config::LanguageConfig;
8use crate::primitives::highlight_engine::HighlightEngine;
9use crate::primitives::highlighter::Language;
10use crate::primitives::GrammarRegistry;
11use std::collections::HashMap;
12use std::path::Path;
13
14/// The result of language detection — groups the three things that must stay in sync
15/// on an `EditorState`: the language name, the highlighting engine, and the
16/// tree-sitter `Language` (used for reference highlighting, indentation, etc.).
17pub struct DetectedLanguage {
18 /// The language name for LSP, status bar, and config lookup
19 /// (e.g., "Rust", "Python", "text", "Plain Text").
20 pub name: String,
21 /// The highlighting engine to use for this buffer.
22 pub highlighter: HighlightEngine,
23 /// The tree-sitter Language, if available (used for reference highlighting,
24 /// auto-indent, bracket matching, etc.). Only ~18 languages have tree-sitter
25 /// support; this is `None` for the remaining 100+ syntect-only languages.
26 pub ts_language: Option<Language>,
27}
28
29impl DetectedLanguage {
30 /// Detect language from a file path using user configuration.
31 ///
32 /// This is the primary detection path used when opening, reloading, or saving files.
33 /// Priority order matches the grammar registry:
34 /// 1. Exact filename match in user config
35 /// 2. Glob pattern match in user config
36 /// 3. Extension match in user config
37 /// 4. Built-in detection (tree-sitter `Language::from_path` + syntect)
38 pub fn from_path(
39 path: &Path,
40 registry: &GrammarRegistry,
41 languages: &HashMap<String, LanguageConfig>,
42 ) -> Self {
43 let highlighter = HighlightEngine::for_file_with_languages(path, registry, languages);
44 let ts_language = Language::from_path(path);
45 let name = if let Some(lang) = &ts_language {
46 lang.to_string()
47 } else {
48 crate::services::lsp::manager::detect_language(path, languages)
49 .unwrap_or_else(|| "text".to_string())
50 };
51 Self {
52 name,
53 highlighter,
54 ts_language,
55 }
56 }
57
58 /// Detect language from a file path using only built-in rules (no user config).
59 ///
60 /// Used by `from_file()` (the legacy constructor) and for virtual buffer names
61 /// where user config doesn't apply.
62 pub fn from_path_builtin(path: &Path, registry: &GrammarRegistry) -> Self {
63 let highlighter = HighlightEngine::for_file(path, registry);
64 let ts_language = Language::from_path(path);
65 let name = ts_language
66 .as_ref()
67 .map(|l| l.to_string())
68 .unwrap_or_else(|| "text".to_string());
69 Self {
70 name,
71 highlighter,
72 ts_language,
73 }
74 }
75
76 /// Set language by syntax name (user selected from the language palette).
77 ///
78 /// Looks up the syntax in the grammar registry and optionally finds a
79 /// tree-sitter language for enhanced features. The `languages` config is used
80 /// to resolve the canonical language ID (e.g., "Rust" syntax → "rust" config key).
81 /// Returns `None` if the syntax name is not found in the registry.
82 pub fn from_syntax_name(
83 name: &str,
84 registry: &GrammarRegistry,
85 languages: &HashMap<String, LanguageConfig>,
86 ) -> Option<Self> {
87 if registry.find_syntax_by_name(name).is_some() {
88 let ts_language = Language::from_name(name);
89 let highlighter = HighlightEngine::for_syntax_name(name, registry, ts_language);
90 // Resolve the canonical language ID from config (e.g., "Rust" → "rust").
91 let language_id =
92 resolve_language_id(name, registry, languages).unwrap_or_else(|| name.to_string());
93 Some(Self {
94 name: language_id,
95 highlighter,
96 ts_language,
97 })
98 } else {
99 None
100 }
101 }
102
103 /// Plain text — no highlighting.
104 pub fn plain_text() -> Self {
105 Self {
106 name: "text".to_string(),
107 highlighter: HighlightEngine::None,
108 ts_language: None,
109 }
110 }
111
112 /// Detect language from a virtual buffer name like `*OLD:test.ts*` or `*OURS*.c`.
113 ///
114 /// Strips surrounding `*` characters and extracts the filename after any
115 /// prefix like "OLD:" or "NEW:".
116 pub fn from_virtual_name(name: &str, registry: &GrammarRegistry) -> Self {
117 let cleaned = name.trim_matches('*');
118 let filename = if let Some(pos) = cleaned.rfind(':') {
119 &cleaned[pos + 1..]
120 } else {
121 cleaned
122 };
123 Self::from_path_builtin(Path::new(filename), registry)
124 }
125}
126
127/// Resolve a syntect syntax display name to its canonical config language ID.
128///
129/// The config `[languages]` section is the single authoritative registry of
130/// language IDs. Each entry has a `grammar` field that is resolved to a
131/// syntect syntax via `GrammarRegistry::find_syntax_by_name`. This function
132/// performs the reverse lookup: for each config entry, resolve its grammar
133/// through the registry and check whether the resulting syntax matches.
134pub fn resolve_language_id(
135 syntax_name: &str,
136 registry: &GrammarRegistry,
137 languages: &HashMap<String, LanguageConfig>,
138) -> Option<String> {
139 for (lang_id, lang_config) in languages {
140 if let Some(syntax) = registry.find_syntax_by_name(&lang_config.grammar) {
141 if syntax.name == syntax_name {
142 return Some(lang_id.clone());
143 }
144 }
145 }
146 None
147}