Skip to main content

ndg_commonmark/syntax/
syntastica.rs

1//! Syntastica-based syntax highlighting backend.
2//!
3//! This module provides a modern tree-sitter based syntax highlighter using the
4//! Syntastica library, which offers excellent language support including native
5//! Nix highlighting.
6//!
7//! ## Theme Support
8//!
9//! We programmatically load all available themes from `syntastica-themes`
10//! Some of the popular themes included are:
11//!
12//! - github (dark/light variants)
13//! - gruvbox (dark/light)
14//! - nord, dracula, catppuccin
15//! - tokyo night, solarized, monokai
16//! - And many more...
17
18use std::{
19  collections::HashMap,
20  fs,
21  path::{Path, PathBuf},
22  sync::Mutex,
23};
24
25use syntastica::{
26  Processor,
27  language_set::{HighlightConfiguration, LanguageSet},
28  render,
29  renderer::HtmlRenderer,
30};
31use syntastica_core::theme::ResolvedTheme;
32use syntastica_parsers::Lang;
33use syntastica_query_preprocessor::{
34  process_highlights,
35  process_injections,
36  process_locals,
37};
38
39use super::{
40  error::{SyntaxError, SyntaxResult},
41  types::{SyntaxConfig, SyntaxHighlighter, SyntaxManager},
42};
43
44/// Syntastica-based syntax highlighter.
45pub struct SyntasticaHighlighter {
46  themes:        HashMap<String, ResolvedTheme>,
47  default_theme: ResolvedTheme,
48  processor:     Mutex<Processor<'static, UserQueryLanguageSet>>,
49  renderer:      Mutex<HtmlRenderer>,
50}
51
52struct UserQueryLanguageSet {
53  configs:            Mutex<HashMap<Lang, &'static HighlightConfiguration>>,
54  syntax_queries_dir: Option<PathBuf>,
55}
56
57impl UserQueryLanguageSet {
58  fn new(syntax_queries_dir: Option<&Path>) -> Self {
59    Self {
60      configs:            Mutex::new(HashMap::new()),
61      syntax_queries_dir: syntax_queries_dir.map(Path::to_path_buf),
62    }
63  }
64
65  fn config_for(
66    &self,
67    lang: Lang,
68  ) -> syntastica::Result<&'static HighlightConfiguration> {
69    if let Some(config) = self
70      .configs
71      .lock()
72      .map_err(|e| {
73        syntastica::Error::UnsupportedLanguage(format!(
74          "syntax language-set lock poisoned: {e}"
75        ))
76      })?
77      .get(&lang)
78      .copied()
79    {
80      return Ok(config);
81    }
82
83    let config =
84      build_highlight_config(lang, self.syntax_queries_dir.as_deref())
85        .map_err(|e| syntastica::Error::UnsupportedLanguage(e.to_string()))?;
86
87    let mut configs = self.configs.lock().map_err(|e| {
88      syntastica::Error::UnsupportedLanguage(format!(
89        "syntax language-set lock poisoned: {e}"
90      ))
91    })?;
92
93    if let Some(config) = configs.get(&lang).copied() {
94      return Ok(config);
95    }
96
97    let config = Box::leak(Box::new(config));
98    configs.insert(lang, config);
99
100    Ok(config)
101  }
102}
103
104fn build_highlight_config(
105  lang: Lang,
106  syntax_queries_dir: Option<&Path>,
107) -> SyntaxResult<HighlightConfiguration> {
108  let mut highlights_query = lang.highlights_query().to_string();
109  let mut injections_query = lang.injections_query().to_string();
110  let mut locals_query = lang.locals_query().to_string();
111
112  if let Some(base_dir) = syntax_queries_dir {
113    if let Some(query) = read_user_query(base_dir, lang, "highlights.scm")? {
114      let extends = is_extends_query(&query);
115      let processed =
116        process_highlights("", true, &rewrite_any_of_predicates(&query));
117      if extends {
118        highlights_query = format!("{highlights_query}\n{processed}");
119      } else {
120        highlights_query = processed;
121      }
122    }
123
124    if let Some(query) = read_user_query(base_dir, lang, "injections.scm")? {
125      let extends = is_extends_query(&query);
126      let processed =
127        process_injections("", true, &rewrite_any_of_predicates(&query));
128      if extends {
129        injections_query = format!("{injections_query}\n{processed}");
130      } else {
131        injections_query = processed;
132      }
133    }
134
135    if let Some(query) = read_user_query(base_dir, lang, "locals.scm")? {
136      let extends = is_extends_query(&query);
137      let processed =
138        process_locals("", true, &rewrite_any_of_predicates(&query));
139      if extends {
140        locals_query = format!("{locals_query}\n{processed}");
141      } else {
142        locals_query = processed;
143      }
144    }
145  }
146
147  let mut config = HighlightConfiguration::new(
148    lang.get(),
149    <&str>::from(lang),
150    &highlights_query,
151    &injections_query,
152    &locals_query,
153  )
154  .map_err(|e| {
155    SyntaxError::BackendError(format!(
156      "failed to build highlight config for '{}': {e}",
157      <&str>::from(lang)
158    ))
159  })?;
160  config.configure(syntastica::theme::THEME_KEYS);
161  Ok(config)
162}
163
164impl<'s> LanguageSet<'s> for UserQueryLanguageSet {
165  type Language = Lang;
166
167  fn get_language(
168    &self,
169    language: Self::Language,
170  ) -> syntastica::Result<&HighlightConfiguration> {
171    self.config_for(language)
172  }
173}
174
175fn is_extends_query(content: &str) -> bool {
176  content
177    .lines()
178    .next()
179    .map(|l| matches!(l.trim(), ";; extends" | ";;extends"))
180    .unwrap_or(false)
181}
182
183/// Rewrites `(#any-of? @cap "a" "b" ...)` into `(#match? @cap "^(a|b|...)$")`.
184///
185/// nvim-treesitter's `#any-of?` is a Lua-backed predicate with no standard
186/// tree-sitter equivalent. The rewrite preserves the same semantics using the
187/// `#match?` predicate that tree-sitter-highlight natively supports.
188fn rewrite_any_of_predicates(query: &str) -> String {
189  const NEEDLE: &str = "#any-of?";
190  let mut result = String::with_capacity(query.len());
191  let mut remaining = query;
192
193  loop {
194    match remaining.find(NEEDLE) {
195      None => {
196        result.push_str(remaining);
197        break;
198      },
199      Some(pos) => {
200        result.push_str(&remaining[..pos]);
201        let from = &remaining[pos..];
202        match parse_any_of_predicate(from) {
203          Some((replacement, consumed)) => {
204            result.push_str(&replacement);
205            remaining = &from[consumed..];
206          },
207          None => {
208            result.push_str(NEEDLE);
209            remaining = &from[NEEDLE.len()..];
210          },
211        }
212      },
213    }
214  }
215
216  result
217}
218
219fn parse_any_of_predicate(s: &str) -> Option<(String, usize)> {
220  const NEEDLE: &str = "#any-of?";
221  let mut pos = NEEDLE.len();
222
223  let skip_ws = |p: usize| p + s[p..].len() - s[p..].trim_start().len();
224
225  pos = skip_ws(pos);
226
227  if !s[pos..].starts_with('@') {
228    return None;
229  }
230
231  let cap_start = pos;
232  pos += 1;
233  while pos < s.len() {
234    let b = s.as_bytes()[pos];
235    if b.is_ascii_whitespace() || b == b')' {
236      break;
237    }
238    pos += 1;
239  }
240  let capture_name = &s[cap_start..pos];
241
242  pos = skip_ws(pos);
243
244  let mut values: Vec<&str> = Vec::new();
245  while pos < s.len() && s.as_bytes()[pos] == b'"' {
246    pos += 1;
247    let val_start = pos;
248    while pos < s.len() && s.as_bytes()[pos] != b'"' {
249      if s.as_bytes()[pos] == b'\\' {
250        pos += 1;
251      }
252      pos += 1;
253    }
254    if pos >= s.len() {
255      return None;
256    }
257    values.push(&s[val_start..pos]);
258    pos += 1;
259    pos = skip_ws(pos);
260  }
261
262  if values.is_empty() {
263    return None;
264  }
265
266  let pattern = format!(
267    "^({})$",
268    values
269      .iter()
270      .map(|v| ts_regex_escape(v))
271      .collect::<Vec<_>>()
272      .join("|")
273  );
274  Some((format!("#match? {capture_name} \"{pattern}\""), pos))
275}
276
277fn ts_regex_escape(s: &str) -> String {
278  let mut out = String::with_capacity(s.len());
279  for c in s.chars() {
280    if matches!(
281      c,
282      '.'
283        | '*'
284        | '+'
285        | '?'
286        | '^'
287        | '$'
288        | '{'
289        | '}'
290        | '['
291        | ']'
292        | '|'
293        | '('
294        | ')'
295        | '\\'
296    ) {
297      out.push('\\');
298    }
299    out.push(c);
300  }
301  out
302}
303
304fn read_user_query(
305  base_dir: &Path,
306  lang: Lang,
307  file_name: &str,
308) -> SyntaxResult<Option<String>> {
309  let query_path = query_path_for_lang(base_dir, lang, file_name);
310  if !query_path.exists() {
311    return Ok(None);
312  }
313
314  fs::read_to_string(&query_path).map(Some).map_err(|e| {
315    SyntaxError::BackendError(format!(
316      "failed to read query override '{}': {e}",
317      query_path.display()
318    ))
319  })
320}
321
322fn query_path_for_lang(
323  base_dir: &Path,
324  lang: Lang,
325  file_name: &str,
326) -> PathBuf {
327  base_dir.join(<&str>::from(lang)).join(file_name)
328}
329
330impl SyntasticaHighlighter {
331  /// Create a new Syntastica highlighter with all available themes.
332  ///
333  /// # Errors
334  ///
335  /// Currently never returns an error, but returns a Result for API
336  /// consistency.
337  pub fn new(syntax_queries_dir: Option<&Path>) -> SyntaxResult<Self> {
338    let mut themes = HashMap::new();
339
340    // Load all available themes
341    for theme_name in syntastica_themes::THEMES {
342      if let Some(theme) = syntastica_themes::from_str(theme_name) {
343        themes.insert((*theme_name).to_string(), theme);
344      }
345    }
346
347    let default_theme = syntastica_themes::one::dark();
348
349    // Leak the language set into a `'static` reference so the `Processor` can
350    // hold it for the remainder of the process lifetime. This is sound for a
351    // CLI: the process exits when documentation generation completes and the OS
352    // reclaims the memory. It avoids the unsound lifetime fabrication that a
353    // raw-pointer cast would require.
354    let language_set_static: &'static UserQueryLanguageSet =
355      Box::leak(Box::new(UserQueryLanguageSet::new(syntax_queries_dir)));
356    let processor = Processor::new(language_set_static);
357
358    Ok(Self {
359      themes,
360      default_theme,
361      processor: Mutex::new(processor),
362      renderer: Mutex::new(HtmlRenderer::new()),
363    })
364  }
365
366  /// Add a custom theme
367  pub fn add_theme(&mut self, name: String, theme: ResolvedTheme) {
368    self.themes.insert(name, theme);
369  }
370
371  /// Set the default theme
372  pub fn set_default_theme(&mut self, theme: ResolvedTheme) {
373    self.default_theme = theme;
374  }
375
376  /// Convert a language string to a Lang enum
377  fn parse_language(language: &str) -> Option<Lang> {
378    match language.to_lowercase().as_str() {
379      "rust" | "rs" => Some(Lang::Rust),
380      "python" | "py" => Some(Lang::Python),
381      "javascript" | "js" => Some(Lang::Javascript),
382      "typescript" | "ts" => Some(Lang::Typescript),
383      "tsx" => Some(Lang::Tsx),
384      "nix" => Some(Lang::Nix),
385      "bash" | "sh" | "shell" => Some(Lang::Bash),
386      "c" => Some(Lang::C),
387      "cpp" | "c++" | "cxx" => Some(Lang::Cpp),
388      "c_sharp" | "csharp" | "cs" => Some(Lang::CSharp),
389      "go" => Some(Lang::Go),
390      "java" => Some(Lang::Java),
391      "json" => Some(Lang::Json),
392      "yaml" | "yml" => Some(Lang::Yaml),
393      "html" => Some(Lang::Html),
394      "css" => Some(Lang::Css),
395      "markdown" | "md" => Some(Lang::Markdown),
396      "markdown_inline" => Some(Lang::MarkdownInline),
397      "sql" => Some(Lang::Sql),
398      "lua" => Some(Lang::Lua),
399      "ruby" | "rb" => Some(Lang::Ruby),
400      "php" => Some(Lang::Php),
401      "php_only" => Some(Lang::PhpOnly),
402      "haskell" | "hs" => Some(Lang::Haskell),
403      "scala" => Some(Lang::Scala),
404      "swift" => Some(Lang::Swift),
405      "makefile" | "make" => Some(Lang::Make),
406      "cmake" => Some(Lang::Cmake),
407      "asm" | "assembly" => Some(Lang::Asm),
408      "diff" | "patch" => Some(Lang::Diff),
409      "elixir" | "ex" | "exs" => Some(Lang::Elixir),
410      "jsdoc" => Some(Lang::Jsdoc),
411      "printf" => Some(Lang::Printf),
412      "regex" | "regexp" => Some(Lang::Regex),
413      "zig" => Some(Lang::Zig),
414      #[allow(clippy::match_same_arms, reason = "Explicit for documentation")]
415      "text" | "txt" | "plain" => None, // use fallback for plain text
416      _ => None,
417    }
418  }
419
420  /// Get the theme by name, falling back to default
421  fn get_theme(&self, theme_name: Option<&str>) -> &ResolvedTheme {
422    theme_name
423      .and_then(|name| self.themes.get(name))
424      .unwrap_or(&self.default_theme)
425  }
426}
427
428impl SyntaxHighlighter for SyntasticaHighlighter {
429  fn name(&self) -> &'static str {
430    "Syntastica"
431  }
432
433  fn supported_languages(&self) -> Vec<String> {
434    vec![
435      "rust",
436      "rs",
437      "python",
438      "py",
439      "javascript",
440      "js",
441      "typescript",
442      "ts",
443      "tsx",
444      "nix",
445      "bash",
446      "sh",
447      "shell",
448      "c",
449      "cpp",
450      "c++",
451      "cxx",
452      "c_sharp",
453      "csharp",
454      "cs",
455      "go",
456      "java",
457      "json",
458      "yaml",
459      "yml",
460      "html",
461      "css",
462      "markdown",
463      "md",
464      "markdown_inline",
465      "sql",
466      "lua",
467      "ruby",
468      "rb",
469      "php",
470      "php_only",
471      "haskell",
472      "hs",
473      "scala",
474      "swift",
475      "makefile",
476      "make",
477      "cmake",
478      "asm",
479      "assembly",
480      "diff",
481      "patch",
482      "elixir",
483      "ex",
484      "exs",
485      "jsdoc",
486      "printf",
487      "regex",
488      "regexp",
489      "zig",
490      "text",
491      "txt",
492      "plain",
493    ]
494    .into_iter()
495    .map(String::from)
496    .collect()
497  }
498
499  fn available_themes(&self) -> Vec<String> {
500    let mut themes: Vec<String> = self.themes.keys().cloned().collect();
501    themes.sort();
502    themes
503  }
504
505  fn highlight(
506    &self,
507    code: &str,
508    language: &str,
509    theme: Option<&str>,
510  ) -> SyntaxResult<String> {
511    let lang = Self::parse_language(language)
512      .ok_or_else(|| SyntaxError::UnsupportedLanguage(language.to_string()))?;
513
514    let theme = self.get_theme(theme);
515
516    // Use the reusable processor via Mutex for thread-safe interior mutability
517    let highlights = self
518      .processor
519      .lock()
520      .map_err(|e| {
521        SyntaxError::HighlightingFailed(format!("Processor lock poisoned: {e}"))
522      })?
523      .process(code, lang)
524      .map_err(|e| SyntaxError::HighlightingFailed(e.to_string()))?;
525
526    // Use the reusable renderer via Mutex for thread-safe interior mutability
527    let html = {
528      let mut renderer = self.renderer.lock().map_err(|e| {
529        SyntaxError::HighlightingFailed(format!("Renderer lock poisoned: {e}"))
530      })?;
531      render(&highlights, &mut *renderer, theme)
532    };
533
534    Ok(html)
535  }
536
537  fn language_from_extension(&self, extension: &str) -> Option<String> {
538    match extension.to_lowercase().as_str() {
539      "rs" => Some("rust".to_string()),
540      "py" | "pyw" => Some("python".to_string()),
541      "js" | "mjs" => Some("javascript".to_string()),
542      "ts" => Some("typescript".to_string()),
543      "tsx" => Some("tsx".to_string()),
544      "nix" => Some("nix".to_string()),
545      "sh" | "bash" | "zsh" | "fish" => Some("bash".to_string()),
546      "c" | "h" => Some("c".to_string()),
547      "cpp" | "cxx" | "cc" | "hpp" | "hxx" | "hh" => Some("cpp".to_string()),
548      "cs" => Some("c_sharp".to_string()),
549      "go" => Some("go".to_string()),
550      "java" => Some("java".to_string()),
551      "json" => Some("json".to_string()),
552      "yaml" | "yml" => Some("yaml".to_string()),
553      "html" | "htm" => Some("html".to_string()),
554      "css" => Some("css".to_string()),
555      "md" | "markdown" => Some("markdown".to_string()),
556      "sql" => Some("sql".to_string()),
557      "lua" => Some("lua".to_string()),
558      "rb" => Some("ruby".to_string()),
559      "php" => Some("php".to_string()),
560      "hs" => Some("haskell".to_string()),
561      "ml" | "mli" => Some("ocaml".to_string()),
562      "scala" => Some("scala".to_string()),
563      "swift" => Some("swift".to_string()),
564      "s" | "asm" => Some("asm".to_string()),
565      "diff" | "patch" => Some("diff".to_string()),
566      "ex" | "exs" => Some("elixir".to_string()),
567      "zig" => Some("zig".to_string()),
568      "txt" => Some("text".to_string()),
569      _ => None,
570    }
571  }
572}
573
574/// Create a Syntastica-based syntax manager with default configuration.
575///
576/// Syntastica provides modern tree-sitter based syntax highlighting with
577/// excellent language support including native Nix highlighting.
578///
579/// # Errors
580///
581/// Returns an error if the Syntastica highlighter fails to initialize.
582pub fn create_syntastica_manager(
583  syntax_queries_dir: Option<&Path>,
584) -> SyntaxResult<SyntaxManager> {
585  let highlighter = Box::new(SyntasticaHighlighter::new(syntax_queries_dir)?);
586  let config = SyntaxConfig {
587    default_theme: Some("one-dark".to_string()),
588    ..Default::default()
589  };
590  Ok(SyntaxManager::new(highlighter, config))
591}
592
593#[cfg(test)]
594mod tests {
595  use super::*;
596
597  #[test]
598  fn test_is_extends_query() {
599    assert!(is_extends_query(";; extends\n(foo) @bar"));
600    assert!(is_extends_query(";;extends\n(foo) @bar"));
601    assert!(!is_extends_query("(foo) @bar"));
602    assert!(!is_extends_query(""));
603    assert!(!is_extends_query("; extends")); // single semicolon is a comment, not the directive
604  }
605
606  #[test]
607  fn test_rewrite_any_of_basic() {
608    let input = r#"((identifier) @_name (#any-of? @_name "foo" "bar"))"#;
609    let output = rewrite_any_of_predicates(input);
610    assert!(output.contains("#match?"));
611    assert!(output.contains("@_name"));
612    assert!(output.contains("^(foo|bar)$"));
613    assert!(!output.contains("#any-of?"));
614  }
615
616  #[test]
617  fn test_rewrite_any_of_multiple() {
618    let input = r#"
619      ((identifier) @a (#any-of? @a "x" "y"))
620      ((identifier) @b (#any-of? @b "p" "q" "r"))
621    "#;
622    let output = rewrite_any_of_predicates(input);
623    assert_eq!(output.matches("#match?").count(), 2);
624    assert!(!output.contains("#any-of?"));
625    assert!(output.contains("^(x|y)$"));
626    assert!(output.contains("^(p|q|r)$"));
627  }
628
629  #[test]
630  fn test_rewrite_any_of_regex_escaping() {
631    let input = r#"((identifier) @a (#any-of? @a "foo.bar" "baz"))"#;
632    let output = rewrite_any_of_predicates(input);
633    assert!(output.contains("foo\\.bar"));
634  }
635
636  #[test]
637  fn test_rewrite_any_of_no_match_passthrough() {
638    let input = "(foo) @bar (#eq? @bar \"baz\")";
639    let output = rewrite_any_of_predicates(input);
640    assert_eq!(input, output);
641  }
642
643  #[test]
644  fn test_rewrite_any_of_nvf_nix_query() {
645    // Matches the actual query from nvf's nix.nix
646    let input = r#"
647;; extends
648
649((apply_expression
650  function: (variable_expression
651    name: (identifier) @_func
652    (#any-of? @_func "mkLuaInline" "entryAnywhere"))
653  argument: (indented_string_expression
654    (string_fragment) @injection.content))
655(#set! injection.language "lua")
656(#set! injection.combined))
657"#;
658    let output = rewrite_any_of_predicates(input);
659    assert!(!output.contains("#any-of?"));
660    assert!(
661      output.contains("#match? @_func \"^(mkLuaInline|entryAnywhere)$\"")
662    );
663    // Non-any-of predicates must be preserved
664    assert!(output.contains("#set! injection.language"));
665    assert!(output.contains("#set! injection.combined"));
666    assert!(output.contains(";; extends"));
667  }
668}