Skip to main content

ndg_commonmark/syntax/
syntastica.rs

1//! Syntastica-based syntax highlighting backend.
2//!
3//! This module provides a modern tree-sitter based syntax highlighter using the
4//! Syntastica library, which offers excellent language support including native
5//! Nix highlighting.
6//!
7//! ## Theme Support
8//!
9//! We programmatically load all available themes from `syntastica-themes`
10//! Some of the popular themes included are:
11//!
12//! - github (dark/light variants)
13//! - gruvbox (dark/light)
14//! - nord, dracula, catppuccin
15//! - tokyo night, solarized, monokai
16//! - And many more...
17
18use std::{
19  collections::HashMap,
20  fs,
21  path::{Path, PathBuf},
22  sync::Mutex,
23};
24
25use syntastica::{
26  Processor,
27  language_set::{HighlightConfiguration, LanguageSet},
28  render,
29  renderer::HtmlRenderer,
30};
31use syntastica_core::theme::ResolvedTheme;
32use syntastica_parsers::Lang;
33use syntastica_query_preprocessor::{
34  process_highlights,
35  process_injections,
36  process_locals,
37};
38
39use super::{
40  error::{SyntaxError, SyntaxResult},
41  types::{SyntaxConfig, SyntaxHighlighter, SyntaxManager},
42};
43
44/// Syntastica-based syntax highlighter.
45pub struct SyntasticaHighlighter {
46  themes:        HashMap<String, ResolvedTheme>,
47  default_theme: ResolvedTheme,
48  processor:     Mutex<Processor<'static, UserQueryLanguageSet>>,
49  renderer:      Mutex<HtmlRenderer>,
50}
51
52struct UserQueryLanguageSet {
53  configs:            Mutex<HashMap<Lang, &'static HighlightConfiguration>>,
54  syntax_queries_dir: Option<PathBuf>,
55}
56
57impl UserQueryLanguageSet {
58  fn new(syntax_queries_dir: Option<&Path>) -> Self {
59    Self {
60      configs:            Mutex::new(HashMap::new()),
61      syntax_queries_dir: syntax_queries_dir.map(Path::to_path_buf),
62    }
63  }
64
65  fn config_for(
66    &self,
67    lang: Lang,
68  ) -> syntastica::Result<&'static HighlightConfiguration> {
69    {
70      let configs = self.configs.lock().map_err(|e| {
71        syntastica::Error::UnsupportedLanguage(format!(
72          "syntax language-set lock poisoned: {e}"
73        ))
74      })?;
75
76      if let Some(config) = configs.get(&lang).copied() {
77        return Ok(config);
78      }
79    }
80
81    let config =
82      build_highlight_config(lang, self.syntax_queries_dir.as_deref())
83        .map_err(|e| syntastica::Error::UnsupportedLanguage(e.to_string()))?;
84
85    let mut configs = self.configs.lock().map_err(|e| {
86      syntastica::Error::UnsupportedLanguage(format!(
87        "syntax language-set lock poisoned: {e}"
88      ))
89    })?;
90
91    let config = configs.get(&lang).copied().unwrap_or_else(|| {
92      let config: &'static HighlightConfiguration = Box::leak(Box::new(config));
93      configs.insert(lang, config);
94      config
95    });
96    drop(configs);
97
98    Ok(config)
99  }
100}
101
102fn build_highlight_config(
103  lang: Lang,
104  syntax_queries_dir: Option<&Path>,
105) -> SyntaxResult<HighlightConfiguration> {
106  let mut highlights_query = lang.highlights_query().to_string();
107  let mut injections_query = lang.injections_query().to_string();
108  let mut locals_query = lang.locals_query().to_string();
109
110  if let Some(base_dir) = syntax_queries_dir {
111    if let Some(query) = read_user_query(base_dir, lang, "highlights.scm")? {
112      let extends = is_extends_query(&query);
113      let processed =
114        process_highlights("", true, &rewrite_any_of_predicates(&query));
115      if extends {
116        highlights_query = format!("{highlights_query}\n{processed}");
117      } else {
118        highlights_query = processed;
119      }
120    }
121
122    if let Some(query) = read_user_query(base_dir, lang, "injections.scm")? {
123      let extends = is_extends_query(&query);
124      let processed =
125        process_injections("", true, &rewrite_any_of_predicates(&query));
126      if extends {
127        injections_query = format!("{injections_query}\n{processed}");
128      } else {
129        injections_query = processed;
130      }
131    }
132
133    if let Some(query) = read_user_query(base_dir, lang, "locals.scm")? {
134      let extends = is_extends_query(&query);
135      let processed =
136        process_locals("", true, &rewrite_any_of_predicates(&query));
137      if extends {
138        locals_query = format!("{locals_query}\n{processed}");
139      } else {
140        locals_query = processed;
141      }
142    }
143  }
144
145  let mut config = HighlightConfiguration::new(
146    lang.get(),
147    <&str>::from(lang),
148    &highlights_query,
149    &injections_query,
150    &locals_query,
151  )
152  .map_err(|e| {
153    SyntaxError::BackendError(format!(
154      "failed to build highlight config for '{}': {e}",
155      <&str>::from(lang)
156    ))
157  })?;
158  config.configure(syntastica::theme::THEME_KEYS);
159  Ok(config)
160}
161
162impl LanguageSet<'_> for UserQueryLanguageSet {
163  type Language = Lang;
164
165  fn get_language(
166    &self,
167    language: Self::Language,
168  ) -> syntastica::Result<&HighlightConfiguration> {
169    self.config_for(language)
170  }
171}
172
173fn is_extends_query(content: &str) -> bool {
174  content
175    .lines()
176    .next()
177    .is_some_and(|l| matches!(l.trim(), ";; extends" | ";;extends"))
178}
179
180/// Rewrites `(#any-of? @cap "a" "b" ...)` into `(#match? @cap "^(a|b|...)$")`.
181///
182/// nvim-treesitter's `#any-of?` is a Lua-backed predicate with no standard
183/// tree-sitter equivalent. The rewrite preserves the same semantics using the
184/// `#match?` predicate that tree-sitter-highlight natively supports.
185fn rewrite_any_of_predicates(query: &str) -> String {
186  const NEEDLE: &str = "#any-of?";
187  let mut result = String::with_capacity(query.len());
188  let mut remaining = query;
189
190  loop {
191    match remaining.find(NEEDLE) {
192      None => {
193        result.push_str(remaining);
194        break;
195      },
196      Some(pos) => {
197        result.push_str(&remaining[..pos]);
198        let from = &remaining[pos..];
199        if let Some((replacement, consumed)) = parse_any_of_predicate(from) {
200          result.push_str(&replacement);
201          remaining = &from[consumed..];
202        } else {
203          result.push_str(NEEDLE);
204          remaining = &from[NEEDLE.len()..];
205        }
206      },
207    }
208  }
209
210  result
211}
212
213fn parse_any_of_predicate(s: &str) -> Option<(String, usize)> {
214  const NEEDLE: &str = "#any-of?";
215  let mut pos = NEEDLE.len();
216
217  let skip_ws = |p: usize| p + s[p..].len() - s[p..].trim_start().len();
218
219  pos = skip_ws(pos);
220
221  if !s[pos..].starts_with('@') {
222    return None;
223  }
224
225  let cap_start = pos;
226  pos += 1;
227  while pos < s.len() {
228    let b = s.as_bytes()[pos];
229    if b.is_ascii_whitespace() || b == b')' {
230      break;
231    }
232    pos += 1;
233  }
234  let capture_name = &s[cap_start..pos];
235
236  pos = skip_ws(pos);
237
238  let mut values: Vec<&str> = Vec::new();
239  while pos < s.len() && s.as_bytes()[pos] == b'"' {
240    pos += 1;
241    let val_start = pos;
242    while pos < s.len() && s.as_bytes()[pos] != b'"' {
243      if s.as_bytes()[pos] == b'\\' {
244        pos += 1;
245      }
246      pos += 1;
247    }
248    if pos >= s.len() {
249      return None;
250    }
251    values.push(&s[val_start..pos]);
252    pos += 1;
253    pos = skip_ws(pos);
254  }
255
256  if values.is_empty() {
257    return None;
258  }
259
260  let pattern = format!(
261    "^({})$",
262    values
263      .iter()
264      .map(|v| ts_regex_escape(v))
265      .collect::<Vec<_>>()
266      .join("|")
267  );
268  Some((format!("#match? {capture_name} \"{pattern}\""), pos))
269}
270
271fn ts_regex_escape(s: &str) -> String {
272  let mut out = String::with_capacity(s.len());
273  for c in s.chars() {
274    if matches!(
275      c,
276      '.'
277        | '*'
278        | '+'
279        | '?'
280        | '^'
281        | '$'
282        | '{'
283        | '}'
284        | '['
285        | ']'
286        | '|'
287        | '('
288        | ')'
289        | '\\'
290    ) {
291      out.push('\\');
292    }
293    out.push(c);
294  }
295  out
296}
297
298fn read_user_query(
299  base_dir: &Path,
300  lang: Lang,
301  file_name: &str,
302) -> SyntaxResult<Option<String>> {
303  let query_path = query_path_for_lang(base_dir, lang, file_name);
304  if !query_path.exists() {
305    return Ok(None);
306  }
307
308  fs::read_to_string(&query_path).map(Some).map_err(|e| {
309    SyntaxError::BackendError(format!(
310      "failed to read query override '{}': {e}",
311      query_path.display()
312    ))
313  })
314}
315
316fn query_path_for_lang(
317  base_dir: &Path,
318  lang: Lang,
319  file_name: &str,
320) -> PathBuf {
321  base_dir.join(<&str>::from(lang)).join(file_name)
322}
323
324impl SyntasticaHighlighter {
325  /// Create a new Syntastica highlighter with all available themes.
326  ///
327  /// # Errors
328  ///
329  /// Currently never returns an error, but returns a Result for API
330  /// consistency.
331  pub fn new(syntax_queries_dir: Option<&Path>) -> SyntaxResult<Self> {
332    let mut themes = HashMap::new();
333
334    // Load all available themes
335    for theme_name in syntastica_themes::THEMES {
336      if let Some(theme) = syntastica_themes::from_str(theme_name) {
337        themes.insert((*theme_name).to_string(), theme);
338      }
339    }
340
341    let default_theme = syntastica_themes::one::dark();
342
343    // Leak the language set into a `'static` reference so the `Processor` can
344    // hold it for the remainder of the process lifetime. This is sound for a
345    // CLI: the process exits when documentation generation completes and the OS
346    // reclaims the memory. It avoids the unsound lifetime fabrication that a
347    // raw-pointer cast would require.
348    let language_set_static: &'static UserQueryLanguageSet =
349      Box::leak(Box::new(UserQueryLanguageSet::new(syntax_queries_dir)));
350    let processor = Processor::new(language_set_static);
351
352    Ok(Self {
353      themes,
354      default_theme,
355      processor: Mutex::new(processor),
356      renderer: Mutex::new(HtmlRenderer::new()),
357    })
358  }
359
360  /// Add a custom theme
361  pub fn add_theme(&mut self, name: String, theme: ResolvedTheme) {
362    self.themes.insert(name, theme);
363  }
364
365  /// Set the default theme
366  pub fn set_default_theme(&mut self, theme: ResolvedTheme) {
367    self.default_theme = theme;
368  }
369
370  /// Convert a language string to a Lang enum
371  fn parse_language(language: &str) -> Option<Lang> {
372    match language.to_lowercase().as_str() {
373      "rust" | "rs" => Some(Lang::Rust),
374      "python" | "py" => Some(Lang::Python),
375      "javascript" | "js" => Some(Lang::Javascript),
376      "typescript" | "ts" => Some(Lang::Typescript),
377      "tsx" => Some(Lang::Tsx),
378      "nix" => Some(Lang::Nix),
379      "bash" | "sh" | "shell" => Some(Lang::Bash),
380      "c" => Some(Lang::C),
381      "cpp" | "c++" | "cxx" => Some(Lang::Cpp),
382      "c_sharp" | "csharp" | "cs" => Some(Lang::CSharp),
383      "go" => Some(Lang::Go),
384      "java" => Some(Lang::Java),
385      "json" => Some(Lang::Json),
386      "yaml" | "yml" => Some(Lang::Yaml),
387      "html" => Some(Lang::Html),
388      "css" => Some(Lang::Css),
389      "markdown" | "md" => Some(Lang::Markdown),
390      "markdown_inline" => Some(Lang::MarkdownInline),
391      "sql" => Some(Lang::Sql),
392      "lua" => Some(Lang::Lua),
393      "ruby" | "rb" => Some(Lang::Ruby),
394      "php" => Some(Lang::Php),
395      "php_only" => Some(Lang::PhpOnly),
396      "haskell" | "hs" => Some(Lang::Haskell),
397      "scala" => Some(Lang::Scala),
398      "swift" => Some(Lang::Swift),
399      "makefile" | "make" => Some(Lang::Make),
400      "cmake" => Some(Lang::Cmake),
401      "asm" | "assembly" => Some(Lang::Asm),
402      "diff" | "patch" => Some(Lang::Diff),
403      "elixir" | "ex" | "exs" => Some(Lang::Elixir),
404      "jsdoc" => Some(Lang::Jsdoc),
405      "printf" => Some(Lang::Printf),
406      "regex" | "regexp" => Some(Lang::Regex),
407      "zig" => Some(Lang::Zig),
408      #[allow(clippy::match_same_arms, reason = "Explicit for documentation")]
409      "text" | "txt" | "plain" => None, // use fallback for plain text
410      _ => None,
411    }
412  }
413
414  /// Get the theme by name, falling back to default
415  fn get_theme(&self, theme_name: Option<&str>) -> &ResolvedTheme {
416    theme_name
417      .and_then(|name| self.themes.get(name))
418      .unwrap_or(&self.default_theme)
419  }
420}
421
422impl SyntaxHighlighter for SyntasticaHighlighter {
423  fn name(&self) -> &'static str {
424    "Syntastica"
425  }
426
427  fn supported_languages(&self) -> Vec<String> {
428    vec![
429      "rust",
430      "rs",
431      "python",
432      "py",
433      "javascript",
434      "js",
435      "typescript",
436      "ts",
437      "tsx",
438      "nix",
439      "bash",
440      "sh",
441      "shell",
442      "c",
443      "cpp",
444      "c++",
445      "cxx",
446      "c_sharp",
447      "csharp",
448      "cs",
449      "go",
450      "java",
451      "json",
452      "yaml",
453      "yml",
454      "html",
455      "css",
456      "markdown",
457      "md",
458      "markdown_inline",
459      "sql",
460      "lua",
461      "ruby",
462      "rb",
463      "php",
464      "php_only",
465      "haskell",
466      "hs",
467      "scala",
468      "swift",
469      "makefile",
470      "make",
471      "cmake",
472      "asm",
473      "assembly",
474      "diff",
475      "patch",
476      "elixir",
477      "ex",
478      "exs",
479      "jsdoc",
480      "printf",
481      "regex",
482      "regexp",
483      "zig",
484      "text",
485      "txt",
486      "plain",
487    ]
488    .into_iter()
489    .map(String::from)
490    .collect()
491  }
492
493  fn available_themes(&self) -> Vec<String> {
494    let mut themes: Vec<String> = self.themes.keys().cloned().collect();
495    themes.sort();
496    themes
497  }
498
499  fn highlight(
500    &self,
501    code: &str,
502    language: &str,
503    theme: Option<&str>,
504  ) -> SyntaxResult<String> {
505    let lang = Self::parse_language(language)
506      .ok_or_else(|| SyntaxError::UnsupportedLanguage(language.to_string()))?;
507
508    let theme = self.get_theme(theme);
509
510    // Use the reusable processor via Mutex for thread-safe interior mutability
511    let highlights = self
512      .processor
513      .lock()
514      .map_err(|e| {
515        SyntaxError::HighlightingFailed(format!("Processor lock poisoned: {e}"))
516      })?
517      .process(code, lang)
518      .map_err(|e| SyntaxError::HighlightingFailed(e.to_string()))?;
519
520    // Use the reusable renderer via Mutex for thread-safe interior mutability
521    let html = {
522      let mut renderer = self.renderer.lock().map_err(|e| {
523        SyntaxError::HighlightingFailed(format!("Renderer lock poisoned: {e}"))
524      })?;
525      render(&highlights, &mut *renderer, theme)
526    };
527
528    Ok(html)
529  }
530
531  fn language_from_extension(&self, extension: &str) -> Option<String> {
532    match extension.to_lowercase().as_str() {
533      "rs" => Some("rust".to_string()),
534      "py" | "pyw" => Some("python".to_string()),
535      "js" | "mjs" => Some("javascript".to_string()),
536      "ts" => Some("typescript".to_string()),
537      "tsx" => Some("tsx".to_string()),
538      "nix" => Some("nix".to_string()),
539      "sh" | "bash" | "zsh" | "fish" => Some("bash".to_string()),
540      "c" | "h" => Some("c".to_string()),
541      "cpp" | "cxx" | "cc" | "hpp" | "hxx" | "hh" => Some("cpp".to_string()),
542      "cs" => Some("c_sharp".to_string()),
543      "go" => Some("go".to_string()),
544      "java" => Some("java".to_string()),
545      "json" => Some("json".to_string()),
546      "yaml" | "yml" => Some("yaml".to_string()),
547      "html" | "htm" => Some("html".to_string()),
548      "css" => Some("css".to_string()),
549      "md" | "markdown" => Some("markdown".to_string()),
550      "sql" => Some("sql".to_string()),
551      "lua" => Some("lua".to_string()),
552      "rb" => Some("ruby".to_string()),
553      "php" => Some("php".to_string()),
554      "hs" => Some("haskell".to_string()),
555      "ml" | "mli" => Some("ocaml".to_string()),
556      "scala" => Some("scala".to_string()),
557      "swift" => Some("swift".to_string()),
558      "s" | "asm" => Some("asm".to_string()),
559      "diff" | "patch" => Some("diff".to_string()),
560      "ex" | "exs" => Some("elixir".to_string()),
561      "zig" => Some("zig".to_string()),
562      "txt" => Some("text".to_string()),
563      _ => None,
564    }
565  }
566}
567
568/// Create a Syntastica-based syntax manager with default configuration.
569///
570/// Syntastica provides modern tree-sitter based syntax highlighting with
571/// excellent language support including native Nix highlighting.
572///
573/// # Errors
574///
575/// Returns an error if the Syntastica highlighter fails to initialize.
576pub fn create_syntastica_manager(
577  syntax_queries_dir: Option<&Path>,
578) -> SyntaxResult<SyntaxManager> {
579  let highlighter = Box::new(SyntasticaHighlighter::new(syntax_queries_dir)?);
580  let config = SyntaxConfig {
581    default_theme: Some("one-dark".to_string()),
582    ..Default::default()
583  };
584  Ok(SyntaxManager::new(highlighter, config))
585}
586
587#[cfg(test)]
588mod tests {
589  use super::*;
590
591  #[test]
592  fn test_is_extends_query() {
593    assert!(is_extends_query(";; extends\n(foo) @bar"));
594    assert!(is_extends_query(";;extends\n(foo) @bar"));
595    assert!(!is_extends_query("(foo) @bar"));
596    assert!(!is_extends_query(""));
597    assert!(!is_extends_query("; extends")); // single semicolon is a comment, not the directive
598  }
599
600  #[test]
601  fn test_rewrite_any_of_basic() {
602    let input = r#"((identifier) @_name (#any-of? @_name "foo" "bar"))"#;
603    let output = rewrite_any_of_predicates(input);
604    assert!(output.contains("#match?"));
605    assert!(output.contains("@_name"));
606    assert!(output.contains("^(foo|bar)$"));
607    assert!(!output.contains("#any-of?"));
608  }
609
610  #[test]
611  fn test_rewrite_any_of_multiple() {
612    let input = r#"
613      ((identifier) @a (#any-of? @a "x" "y"))
614      ((identifier) @b (#any-of? @b "p" "q" "r"))
615    "#;
616    let output = rewrite_any_of_predicates(input);
617    assert_eq!(output.matches("#match?").count(), 2);
618    assert!(!output.contains("#any-of?"));
619    assert!(output.contains("^(x|y)$"));
620    assert!(output.contains("^(p|q|r)$"));
621  }
622
623  #[test]
624  fn test_rewrite_any_of_regex_escaping() {
625    let input = r#"((identifier) @a (#any-of? @a "foo.bar" "baz"))"#;
626    let output = rewrite_any_of_predicates(input);
627    assert!(output.contains("foo\\.bar"));
628  }
629
630  #[test]
631  fn test_rewrite_any_of_no_match_passthrough() {
632    let input = "(foo) @bar (#eq? @bar \"baz\")";
633    let output = rewrite_any_of_predicates(input);
634    assert_eq!(input, output);
635  }
636
637  #[test]
638  fn test_rewrite_any_of_nvf_nix_query() {
639    // Matches the actual query from nvf's nix.nix
640    let input = r#"
641;; extends
642
643((apply_expression
644  function: (variable_expression
645    name: (identifier) @_func
646    (#any-of? @_func "mkLuaInline" "entryAnywhere"))
647  argument: (indented_string_expression
648    (string_fragment) @injection.content))
649(#set! injection.language "lua")
650(#set! injection.combined))
651"#;
652    let output = rewrite_any_of_predicates(input);
653    assert!(!output.contains("#any-of?"));
654    assert!(
655      output.contains("#match? @_func \"^(mkLuaInline|entryAnywhere)$\"")
656    );
657    // Non-any-of predicates must be preserved
658    assert!(output.contains("#set! injection.language"));
659    assert!(output.contains("#set! injection.combined"));
660    assert!(output.contains(";; extends"));
661  }
662}