Skip to main content

editor_core_lang/
lib.rs

1#![warn(missing_docs)]
2//! `editor-core-lang` - data-driven language configuration helpers for `editor-core`.
3//!
4//! This crate intentionally stays lightweight and does **not** depend on `lsp-types` or any
5//! parsing/highlighting systems. It provides small structs that hosts can use to configure
6//! editor-kernel features in a language-aware way.
7
8use std::collections::BTreeMap;
9use std::path::{Path, PathBuf};
10
11/// Comment tokens/config for a given language.
12///
13/// The editor kernel can use this to implement comment toggling in a UI-agnostic way.
14#[derive(Debug, Clone, PartialEq, Eq, Default)]
15pub struct CommentConfig {
16    /// Line comment token (e.g. `//`, `#`).
17    pub line: Option<String>,
18    /// Block comment start token (e.g. `/*`).
19    pub block_start: Option<String>,
20    /// Block comment end token (e.g. `*/`).
21    pub block_end: Option<String>,
22}
23
24impl CommentConfig {
25    /// Create a config that supports only line comments.
26    pub fn line(token: impl Into<String>) -> Self {
27        Self {
28            line: Some(token.into()),
29            block_start: None,
30            block_end: None,
31        }
32    }
33
34    /// Create a config that supports only block comments.
35    pub fn block(start: impl Into<String>, end: impl Into<String>) -> Self {
36        Self {
37            line: None,
38            block_start: Some(start.into()),
39            block_end: Some(end.into()),
40        }
41    }
42
43    /// Create a config that supports both line and block comments.
44    pub fn line_and_block(
45        line: impl Into<String>,
46        block_start: impl Into<String>,
47        block_end: impl Into<String>,
48    ) -> Self {
49        Self {
50            line: Some(line.into()),
51            block_start: Some(block_start.into()),
52            block_end: Some(block_end.into()),
53        }
54    }
55
56    /// Returns `true` if a line comment token is configured.
57    pub fn has_line(&self) -> bool {
58        self.line.as_deref().is_some_and(|s| !s.is_empty())
59    }
60
61    /// Returns `true` if both block comment tokens are configured.
62    pub fn has_block(&self) -> bool {
63        self.block_start.as_deref().is_some_and(|s| !s.is_empty())
64            && self.block_end.as_deref().is_some_and(|s| !s.is_empty())
65    }
66}
67
68/// A single auto-pair entry (opening + closing delimiter).
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub struct AutoPair {
71    /// Opening delimiter.
72    pub open: char,
73    /// Closing delimiter.
74    pub close: char,
75}
76
77impl AutoPair {
78    /// Create a new auto-pair entry.
79    pub const fn new(open: char, close: char) -> Self {
80        Self { open, close }
81    }
82}
83
84/// Auto-pairs configuration (auto-close, wrap selection, skip-over, delete-pair).
85///
86/// Notes:
87/// - This mirrors `editor-core`'s auto-pairs behavior, but stays in this crate to avoid a cyclic
88///   dependency (`editor-core` depends on `editor-core-lang`).
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct AutoPairsConfig {
91    /// Master enable switch for auto-pairs behaviors.
92    pub enabled: bool,
93    /// Configured delimiter pairs (order matters when overlapping; first match wins).
94    pub pairs: Vec<AutoPair>,
95    /// When typing an opening delimiter over a non-empty selection, wrap the selection.
96    pub wrap_selection: bool,
97    /// When typing a closing delimiter and the next character matches, skip over it instead of inserting.
98    pub skip_over_closing: bool,
99    /// When backspacing/deleting adjacent matching delimiters, delete both.
100    pub delete_pair: bool,
101}
102
103impl Default for AutoPairsConfig {
104    fn default() -> Self {
105        Self {
106            enabled: false,
107            pairs: vec![
108                AutoPair::new('(', ')'),
109                AutoPair::new('[', ']'),
110                AutoPair::new('{', '}'),
111                AutoPair::new('"', '"'),
112                AutoPair::new('\'', '\''),
113                AutoPair::new('`', '`'),
114            ],
115            wrap_selection: true,
116            skip_over_closing: true,
117            delete_pair: true,
118        }
119    }
120}
121
122/// Identifier for a language configuration entry (e.g. `"rust"`, `"python"`).
123#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
124pub struct LanguageId(String);
125
126impl LanguageId {
127    /// Create a new language id.
128    pub fn new(id: impl Into<String>) -> Self {
129        Self(id.into())
130    }
131
132    /// Borrow the id as `&str`.
133    pub fn as_str(&self) -> &str {
134        &self.0
135    }
136}
137
138impl From<&str> for LanguageId {
139    fn from(value: &str) -> Self {
140        Self::new(value)
141    }
142}
143
144impl From<String> for LanguageId {
145    fn from(value: String) -> Self {
146        Self::new(value)
147    }
148}
149
150/// How the language prefers indentation.
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub enum IndentStyle {
153    /// Use literal `\t` characters.
154    Tabs,
155    /// Use spaces, with the given per-indent width (e.g. 2 or 4).
156    Spaces(u8),
157}
158
159impl Default for IndentStyle {
160    fn default() -> Self {
161        Self::Spaces(4)
162    }
163}
164
165/// Indentation configuration for a language.
166///
167/// Notes:
168/// - This is **configuration only**; applying language-aware indentation still depends on the
169///   host/editor policy layer.
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub struct IndentationConfig {
172    /// Preferred indent style for this language.
173    pub style: IndentStyle,
174
175    /// Characters that typically introduce a new indentation level when they appear before an
176    /// inserted newline (e.g. `{`, `:` in Python, etc.).
177    pub indent_triggers: Vec<char>,
178
179    /// Characters that typically decrease indentation (e.g. `}`, `]`, `)`).
180    pub outdent_triggers: Vec<char>,
181}
182
183impl Default for IndentationConfig {
184    fn default() -> Self {
185        Self {
186            style: IndentStyle::default(),
187            indent_triggers: vec!['{', '[', '(', ':'],
188            outdent_triggers: vec!['}', ']', ')'],
189        }
190    }
191}
192
193/// Word-boundary configuration for editor-friendly "word" operations.
194///
195/// This corresponds to `editor_core::WordBoundaryConfig::set_ascii_boundary_chars` (similar in
196/// spirit to VSCode's `wordSeparators`).
197#[derive(Debug, Clone, PartialEq, Eq, Default)]
198pub struct WordBoundaryLanguageConfig {
199    /// Override ASCII boundary characters (whitespace is always boundary).
200    ///
201    /// When `None`, hosts should keep editor-core defaults (identifier-like words).
202    pub ascii_boundary_chars: Option<String>,
203}
204
205/// Tree-sitter configuration for a language.
206///
207/// This crate keeps this as **data only**:
208/// - It does not depend on tree-sitter libraries.
209/// - Hosts (or integration crates) decide how to resolve `query_pack_id` into actual `.scm` text.
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct TreeSitterLanguageConfig {
212    /// Tree-sitter grammar name (e.g. `"rust"`).
213    pub grammar: String,
214
215    /// Query pack id used to resolve highlights/folds queries (e.g. `"rust"`).
216    pub query_pack_id: String,
217
218    /// Whether the host should enable Tree-sitter for this language by default.
219    pub enabled_by_default: bool,
220}
221
222/// LSP launch configuration for a language.
223///
224/// This is intentionally lightweight and does not embed `lsp-types` models.
225#[derive(Debug, Clone, PartialEq, Eq)]
226pub struct LspLanguageConfig {
227    /// LSP `languageId` string (e.g. `"rust"`, `"python"`).
228    pub language_id: String,
229    /// LSP server command (e.g. `"rust-analyzer"`).
230    pub command: String,
231    /// Arguments passed to the LSP server (no shell quoting).
232    pub args: Vec<String>,
233
234    /// Workspace root detection markers. When any marker exists in an ancestor directory of the
235    /// opened file, that directory becomes the LSP root.
236    ///
237    /// Markers can be files or directories (e.g. `"Cargo.toml"`, `".git"`).
238    pub root_markers: Vec<String>,
239}
240
241impl LspLanguageConfig {
242    /// Try to detect a workspace root directory for a given file path.
243    ///
244    /// Returns `None` when no configured markers were found.
245    pub fn detect_root_dir(&self, file_path: &Path) -> Option<PathBuf> {
246        let start_dir = if file_path.is_dir() {
247            file_path
248        } else {
249            file_path.parent()?
250        };
251        self.detect_root_dir_from_dir(start_dir)
252    }
253
254    /// Try to detect a workspace root directory starting from a directory.
255    ///
256    /// Returns `None` when no configured markers were found.
257    pub fn detect_root_dir_from_dir(&self, start_dir: &Path) -> Option<PathBuf> {
258        let mut dir = start_dir.to_path_buf();
259        loop {
260            for marker in &self.root_markers {
261                if dir.join(marker).exists() {
262                    return Some(dir);
263                }
264            }
265            if !dir.pop() {
266                break;
267            }
268        }
269        None
270    }
271}
272
273/// Unified per-language configuration.
274#[derive(Debug, Clone, PartialEq, Eq)]
275pub struct LanguageConfig {
276    /// Stable language id (e.g. `"rust"`).
277    pub id: LanguageId,
278    /// Human-friendly name (e.g. `"Rust"`).
279    pub display_name: String,
280
281    /// File extensions matched by this language (lowercase, without the leading dot).
282    pub file_extensions: Vec<String>,
283    /// Exact file names matched by this language (case-sensitive).
284    pub file_names: Vec<String>,
285
286    /// Comment tokens.
287    pub comments: CommentConfig,
288    /// Auto-pairs rules (typically applied via `EditCommand::TypeChar`).
289    pub auto_pairs: AutoPairsConfig,
290    /// Indentation preferences and trigger chars.
291    pub indentation: IndentationConfig,
292    /// Word boundary override (for word movement/selection).
293    pub word_boundary: WordBoundaryLanguageConfig,
294
295    /// Optional Tree-sitter config.
296    pub treesitter: Option<TreeSitterLanguageConfig>,
297    /// Optional LSP config.
298    pub lsp: Option<LspLanguageConfig>,
299
300    /// Free-form extra settings for hosts (UI-specific knobs, etc).
301    ///
302    /// This is intentionally not interpreted by the workspace crates.
303    pub extra: BTreeMap<String, String>,
304}
305
306impl LanguageConfig {
307    /// Create a minimal language config.
308    pub fn new(id: impl Into<LanguageId>, display_name: impl Into<String>) -> Self {
309        Self {
310            id: id.into(),
311            display_name: display_name.into(),
312            file_extensions: Vec::new(),
313            file_names: Vec::new(),
314            comments: CommentConfig::default(),
315            auto_pairs: AutoPairsConfig::default(),
316            indentation: IndentationConfig::default(),
317            word_boundary: WordBoundaryLanguageConfig::default(),
318            treesitter: None,
319            lsp: None,
320            extra: BTreeMap::new(),
321        }
322    }
323
324    /// Add a file extension match (without the leading dot).
325    pub fn with_extension(mut self, ext: impl Into<String>) -> Self {
326        self.file_extensions.push(ext.into());
327        self
328    }
329
330    /// Add an exact file name match.
331    pub fn with_file_name(mut self, name: impl Into<String>) -> Self {
332        self.file_names.push(name.into());
333        self
334    }
335
336    /// Returns `true` if this language config matches the given path (file name or extension).
337    pub fn matches_path(&self, path: &Path) -> bool {
338        if let Some(name) = path.file_name().and_then(|n| n.to_str())
339            && self.file_names.iter().any(|x| x == name)
340        {
341            return true;
342        }
343
344        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
345            let ext = ext.to_ascii_lowercase();
346            if self
347                .file_extensions
348                .iter()
349                .any(|x| x.to_ascii_lowercase() == ext)
350            {
351                return true;
352            }
353        }
354
355        false
356    }
357}
358
359/// Error returned by [`LanguageRegistry::register`].
360#[derive(Debug, Clone, PartialEq, Eq)]
361pub enum LanguageRegistryError {
362    /// A language with the same id is already registered.
363    DuplicateLanguageId(String),
364}
365
366impl std::fmt::Display for LanguageRegistryError {
367    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
368        match self {
369            Self::DuplicateLanguageId(id) => write!(f, "duplicate language id: {id}"),
370        }
371    }
372}
373
374impl std::error::Error for LanguageRegistryError {}
375
376/// A lightweight in-memory registry of language configurations.
377#[derive(Debug, Clone)]
378pub struct LanguageRegistry {
379    languages: Vec<LanguageConfig>,
380}
381
382impl LanguageRegistry {
383    /// Create an empty registry.
384    pub fn new() -> Self {
385        Self {
386            languages: Vec::new(),
387        }
388    }
389
390    /// Register a language config.
391    pub fn register(&mut self, lang: LanguageConfig) -> Result<(), LanguageRegistryError> {
392        if self
393            .languages
394            .iter()
395            .any(|l| l.id.as_str() == lang.id.as_str())
396        {
397            return Err(LanguageRegistryError::DuplicateLanguageId(
398                lang.id.as_str().to_string(),
399            ));
400        }
401        self.languages.push(lang);
402        Ok(())
403    }
404
405    /// Return all registered languages.
406    pub fn languages(&self) -> &[LanguageConfig] {
407        &self.languages
408    }
409
410    /// Find a language config by its id.
411    pub fn by_id(&self, id: &str) -> Option<&LanguageConfig> {
412        self.languages.iter().find(|l| l.id.as_str() == id)
413    }
414
415    /// Find a language config for a file path.
416    pub fn language_for_path(&self, path: &Path) -> Option<&LanguageConfig> {
417        self.languages.iter().find(|l| l.matches_path(path))
418    }
419}
420
421impl Default for LanguageConfig {
422    fn default() -> Self {
423        Self::new("plain-text", "Plain Text")
424    }
425}
426
427impl Default for LanguageRegistry {
428    fn default() -> Self {
429        let mut reg = Self::new();
430
431        // Rust.
432        let mut rust = LanguageConfig::new("rust", "Rust").with_extension("rs");
433        rust.comments = CommentConfig::line_and_block("//", "/*", "*/");
434        rust.lsp = Some(LspLanguageConfig {
435            language_id: "rust".to_string(),
436            command: "rust-analyzer".to_string(),
437            args: Vec::new(),
438            root_markers: vec!["Cargo.toml".to_string(), ".git".to_string()],
439        });
440        rust.treesitter = Some(TreeSitterLanguageConfig {
441            grammar: "rust".to_string(),
442            query_pack_id: "rust".to_string(),
443            enabled_by_default: true,
444        });
445        // Enable auto-pairs by default for code-like languages, but keep it disabled at the
446        // kernel boundary unless the host chooses to enable it.
447        rust.auto_pairs.enabled = true;
448        let _ = reg.register(rust);
449
450        // TOML.
451        let mut toml = LanguageConfig::new("toml", "TOML").with_extension("toml");
452        toml.file_names.push("Cargo.toml".to_string());
453        toml.comments = CommentConfig::line("#");
454        toml.auto_pairs.enabled = true;
455        let _ = reg.register(toml);
456
457        // Markdown.
458        let mut md = LanguageConfig::new("markdown", "Markdown")
459            .with_extension("md")
460            .with_extension("markdown");
461        md.comments = CommentConfig::block("<!--", "-->");
462        md.auto_pairs.enabled = true;
463        let _ = reg.register(md);
464
465        // JSON (no native comments).
466        let mut json = LanguageConfig::new("json", "JSON").with_extension("json");
467        json.comments = CommentConfig::default();
468        json.auto_pairs.enabled = true;
469        let _ = reg.register(json);
470
471        reg
472    }
473}