Skip to main content

ndg_commonmark/syntax/
syntastica.rs

1//! Syntastica-based syntax highlighting backend.
2//!
3//! This module provides a modern tree-sitter based syntax highlighter using the
4//! Syntastica library, which offers excellent language support including native
5//! Nix highlighting.
6//!
7//! ## Theme Support
8//!
9//! We programmatically load all available themes from `syntastica-themes`
10//! Some of the popular themes included are:
11//!
12//! - github (dark/light variants)
13//! - gruvbox (dark/light)
14//! - nord, dracula, catppuccin
15//! - tokyo night, solarized, monokai
16//! - And many more...
17
18use std::{
19  collections::HashMap,
20  fs,
21  path::{Path, PathBuf},
22  sync::Mutex,
23};
24
25use syntastica::{
26  Processor,
27  language_set::{HighlightConfiguration, LanguageSet},
28  render,
29  renderer::HtmlRenderer,
30};
31use syntastica_core::theme::ResolvedTheme;
32use syntastica_parsers::{LANGUAGES, Lang};
33use syntastica_query_preprocessor::{
34  process_highlights,
35  process_injections,
36  process_locals,
37};
38
39use super::{
40  error::{SyntaxError, SyntaxResult},
41  types::{SyntaxConfig, SyntaxHighlighter, SyntaxManager},
42};
43
44/// Syntastica-based syntax highlighter.
45pub struct SyntasticaHighlighter {
46  themes:        HashMap<String, ResolvedTheme>,
47  default_theme: ResolvedTheme,
48  processor:     Mutex<Processor<'static, UserQueryLanguageSet>>,
49  renderer:      Mutex<HtmlRenderer>,
50}
51
52struct UserQueryLanguageSet {
53  configs: HashMap<Lang, HighlightConfiguration>,
54}
55
56impl UserQueryLanguageSet {
57  fn new(syntax_queries_dir: Option<&Path>) -> SyntaxResult<Self> {
58    let mut configs = HashMap::new();
59
60    for &lang in LANGUAGES {
61      let mut highlights_query = lang.highlights_query().to_string();
62      let mut injections_query = lang.injections_query().to_string();
63      let mut locals_query = lang.locals_query().to_string();
64
65      if let Some(base_dir) = syntax_queries_dir {
66        if let Some(query) = read_user_query(base_dir, lang, "highlights.scm")?
67        {
68          let extends = is_extends_query(&query);
69          let processed =
70            process_highlights("", true, &rewrite_any_of_predicates(&query));
71          if extends {
72            highlights_query = format!("{highlights_query}\n{processed}");
73          } else {
74            highlights_query = processed;
75          }
76        }
77
78        if let Some(query) = read_user_query(base_dir, lang, "injections.scm")?
79        {
80          let extends = is_extends_query(&query);
81          let processed =
82            process_injections("", true, &rewrite_any_of_predicates(&query));
83          if extends {
84            injections_query = format!("{injections_query}\n{processed}");
85          } else {
86            injections_query = processed;
87          }
88        }
89
90        if let Some(query) = read_user_query(base_dir, lang, "locals.scm")? {
91          let extends = is_extends_query(&query);
92          let processed =
93            process_locals("", true, &rewrite_any_of_predicates(&query));
94          if extends {
95            locals_query = format!("{locals_query}\n{processed}");
96          } else {
97            locals_query = processed;
98          }
99        }
100      }
101
102      let mut config = HighlightConfiguration::new(
103        lang.get(),
104        <&str>::from(lang),
105        &highlights_query,
106        &injections_query,
107        &locals_query,
108      )
109      .map_err(|e| {
110        SyntaxError::BackendError(format!(
111          "failed to build highlight config for '{}': {e}",
112          <&str>::from(lang)
113        ))
114      })?;
115      config.configure(syntastica::theme::THEME_KEYS);
116      configs.insert(lang, config);
117    }
118
119    Ok(Self { configs })
120  }
121}
122
123impl<'s> LanguageSet<'s> for UserQueryLanguageSet {
124  type Language = Lang;
125
126  fn get_language(
127    &self,
128    language: Self::Language,
129  ) -> syntastica::Result<&HighlightConfiguration> {
130    self.configs.get(&language).ok_or_else(|| {
131      syntastica::Error::UnsupportedLanguage(<&str>::from(language).to_string())
132    })
133  }
134}
135
136fn is_extends_query(content: &str) -> bool {
137  content
138    .lines()
139    .next()
140    .map(|l| matches!(l.trim(), ";; extends" | ";;extends"))
141    .unwrap_or(false)
142}
143
144/// Rewrites `(#any-of? @cap "a" "b" ...)` into `(#match? @cap "^(a|b|...)$")`.
145///
146/// nvim-treesitter's `#any-of?` is a Lua-backed predicate with no standard
147/// tree-sitter equivalent. The rewrite preserves the same semantics using the
148/// `#match?` predicate that tree-sitter-highlight natively supports.
149fn rewrite_any_of_predicates(query: &str) -> String {
150  const NEEDLE: &str = "#any-of?";
151  let mut result = String::with_capacity(query.len());
152  let mut remaining = query;
153
154  loop {
155    match remaining.find(NEEDLE) {
156      None => {
157        result.push_str(remaining);
158        break;
159      },
160      Some(pos) => {
161        result.push_str(&remaining[..pos]);
162        let from = &remaining[pos..];
163        match parse_any_of_predicate(from) {
164          Some((replacement, consumed)) => {
165            result.push_str(&replacement);
166            remaining = &from[consumed..];
167          },
168          None => {
169            result.push_str(NEEDLE);
170            remaining = &from[NEEDLE.len()..];
171          },
172        }
173      },
174    }
175  }
176
177  result
178}
179
180fn parse_any_of_predicate(s: &str) -> Option<(String, usize)> {
181  const NEEDLE: &str = "#any-of?";
182  let mut pos = NEEDLE.len();
183
184  let skip_ws = |p: usize| p + s[p..].len() - s[p..].trim_start().len();
185
186  pos = skip_ws(pos);
187
188  if !s[pos..].starts_with('@') {
189    return None;
190  }
191
192  let cap_start = pos;
193  pos += 1;
194  while pos < s.len() {
195    let b = s.as_bytes()[pos];
196    if b.is_ascii_whitespace() || b == b')' {
197      break;
198    }
199    pos += 1;
200  }
201  let capture_name = &s[cap_start..pos];
202
203  pos = skip_ws(pos);
204
205  let mut values: Vec<&str> = Vec::new();
206  while pos < s.len() && s.as_bytes()[pos] == b'"' {
207    pos += 1;
208    let val_start = pos;
209    while pos < s.len() && s.as_bytes()[pos] != b'"' {
210      if s.as_bytes()[pos] == b'\\' {
211        pos += 1;
212      }
213      pos += 1;
214    }
215    if pos >= s.len() {
216      return None;
217    }
218    values.push(&s[val_start..pos]);
219    pos += 1;
220    pos = skip_ws(pos);
221  }
222
223  if values.is_empty() {
224    return None;
225  }
226
227  let pattern = format!(
228    "^({})$",
229    values
230      .iter()
231      .map(|v| ts_regex_escape(v))
232      .collect::<Vec<_>>()
233      .join("|")
234  );
235  Some((format!("#match? {capture_name} \"{pattern}\""), pos))
236}
237
238fn ts_regex_escape(s: &str) -> String {
239  let mut out = String::with_capacity(s.len());
240  for c in s.chars() {
241    if matches!(
242      c,
243      '.'
244        | '*'
245        | '+'
246        | '?'
247        | '^'
248        | '$'
249        | '{'
250        | '}'
251        | '['
252        | ']'
253        | '|'
254        | '('
255        | ')'
256        | '\\'
257    ) {
258      out.push('\\');
259    }
260    out.push(c);
261  }
262  out
263}
264
265fn read_user_query(
266  base_dir: &Path,
267  lang: Lang,
268  file_name: &str,
269) -> SyntaxResult<Option<String>> {
270  let query_path = query_path_for_lang(base_dir, lang, file_name);
271  if !query_path.exists() {
272    return Ok(None);
273  }
274
275  fs::read_to_string(&query_path).map(Some).map_err(|e| {
276    SyntaxError::BackendError(format!(
277      "failed to read query override '{}': {e}",
278      query_path.display()
279    ))
280  })
281}
282
283fn query_path_for_lang(
284  base_dir: &Path,
285  lang: Lang,
286  file_name: &str,
287) -> PathBuf {
288  base_dir.join(<&str>::from(lang)).join(file_name)
289}
290
291impl SyntasticaHighlighter {
292  /// Create a new Syntastica highlighter with all available themes.
293  ///
294  /// # Errors
295  ///
296  /// Currently never returns an error, but returns a Result for API
297  /// consistency.
298  pub fn new(syntax_queries_dir: Option<&Path>) -> SyntaxResult<Self> {
299    let mut themes = HashMap::new();
300
301    // Load all available themes
302    for theme_name in syntastica_themes::THEMES {
303      if let Some(theme) = syntastica_themes::from_str(theme_name) {
304        themes.insert((*theme_name).to_string(), theme);
305      }
306    }
307
308    let default_theme = syntastica_themes::one::dark();
309
310    // Leak the language set into a `'static` reference so the `Processor` can
311    // hold it for the remainder of the process lifetime. This is sound for a
312    // CLI: the process exits when documentation generation completes and the OS
313    // reclaims the memory. It avoids the unsound lifetime fabrication that a
314    // raw-pointer cast would require.
315    let language_set_static: &'static UserQueryLanguageSet =
316      Box::leak(Box::new(UserQueryLanguageSet::new(syntax_queries_dir)?));
317    let processor = Processor::new(language_set_static);
318
319    Ok(Self {
320      themes,
321      default_theme,
322      processor: Mutex::new(processor),
323      renderer: Mutex::new(HtmlRenderer::new()),
324    })
325  }
326
327  /// Add a custom theme
328  pub fn add_theme(&mut self, name: String, theme: ResolvedTheme) {
329    self.themes.insert(name, theme);
330  }
331
332  /// Set the default theme
333  pub fn set_default_theme(&mut self, theme: ResolvedTheme) {
334    self.default_theme = theme;
335  }
336
337  /// Convert a language string to a Lang enum
338  fn parse_language(language: &str) -> Option<Lang> {
339    match language.to_lowercase().as_str() {
340      "rust" | "rs" => Some(Lang::Rust),
341      "python" | "py" => Some(Lang::Python),
342      "javascript" | "js" => Some(Lang::Javascript),
343      "typescript" | "ts" => Some(Lang::Typescript),
344      "tsx" => Some(Lang::Tsx),
345      "nix" => Some(Lang::Nix),
346      "bash" | "sh" | "shell" => Some(Lang::Bash),
347      "c" => Some(Lang::C),
348      "cpp" | "c++" | "cxx" => Some(Lang::Cpp),
349      "c_sharp" | "csharp" | "cs" => Some(Lang::CSharp),
350      "go" => Some(Lang::Go),
351      "java" => Some(Lang::Java),
352      "json" => Some(Lang::Json),
353      "yaml" | "yml" => Some(Lang::Yaml),
354      "html" => Some(Lang::Html),
355      "css" => Some(Lang::Css),
356      "markdown" | "md" => Some(Lang::Markdown),
357      "markdown_inline" => Some(Lang::MarkdownInline),
358      "sql" => Some(Lang::Sql),
359      "lua" => Some(Lang::Lua),
360      "ruby" | "rb" => Some(Lang::Ruby),
361      "php" => Some(Lang::Php),
362      "php_only" => Some(Lang::PhpOnly),
363      "haskell" | "hs" => Some(Lang::Haskell),
364      "scala" => Some(Lang::Scala),
365      "swift" => Some(Lang::Swift),
366      "makefile" | "make" => Some(Lang::Make),
367      "cmake" => Some(Lang::Cmake),
368      "asm" | "assembly" => Some(Lang::Asm),
369      "diff" | "patch" => Some(Lang::Diff),
370      "elixir" | "ex" | "exs" => Some(Lang::Elixir),
371      "jsdoc" => Some(Lang::Jsdoc),
372      "printf" => Some(Lang::Printf),
373      "regex" | "regexp" => Some(Lang::Regex),
374      "zig" => Some(Lang::Zig),
375      #[allow(clippy::match_same_arms, reason = "Explicit for documentation")]
376      "text" | "txt" | "plain" => None, // use fallback for plain text
377      _ => None,
378    }
379  }
380
381  /// Get the theme by name, falling back to default
382  fn get_theme(&self, theme_name: Option<&str>) -> &ResolvedTheme {
383    theme_name
384      .and_then(|name| self.themes.get(name))
385      .unwrap_or(&self.default_theme)
386  }
387}
388
389impl SyntaxHighlighter for SyntasticaHighlighter {
390  fn name(&self) -> &'static str {
391    "Syntastica"
392  }
393
394  fn supported_languages(&self) -> Vec<String> {
395    vec![
396      "rust",
397      "rs",
398      "python",
399      "py",
400      "javascript",
401      "js",
402      "typescript",
403      "ts",
404      "tsx",
405      "nix",
406      "bash",
407      "sh",
408      "shell",
409      "c",
410      "cpp",
411      "c++",
412      "cxx",
413      "c_sharp",
414      "csharp",
415      "cs",
416      "go",
417      "java",
418      "json",
419      "yaml",
420      "yml",
421      "html",
422      "css",
423      "markdown",
424      "md",
425      "markdown_inline",
426      "sql",
427      "lua",
428      "ruby",
429      "rb",
430      "php",
431      "php_only",
432      "haskell",
433      "hs",
434      "scala",
435      "swift",
436      "makefile",
437      "make",
438      "cmake",
439      "asm",
440      "assembly",
441      "diff",
442      "patch",
443      "elixir",
444      "ex",
445      "exs",
446      "jsdoc",
447      "printf",
448      "regex",
449      "regexp",
450      "zig",
451      "text",
452      "txt",
453      "plain",
454    ]
455    .into_iter()
456    .map(String::from)
457    .collect()
458  }
459
460  fn available_themes(&self) -> Vec<String> {
461    let mut themes: Vec<String> = self.themes.keys().cloned().collect();
462    themes.sort();
463    themes
464  }
465
466  fn highlight(
467    &self,
468    code: &str,
469    language: &str,
470    theme: Option<&str>,
471  ) -> SyntaxResult<String> {
472    let lang = Self::parse_language(language)
473      .ok_or_else(|| SyntaxError::UnsupportedLanguage(language.to_string()))?;
474
475    let theme = self.get_theme(theme);
476
477    // Use the reusable processor via Mutex for thread-safe interior mutability
478    let highlights = self
479      .processor
480      .lock()
481      .map_err(|e| {
482        SyntaxError::HighlightingFailed(format!("Processor lock poisoned: {e}"))
483      })?
484      .process(code, lang)
485      .map_err(|e| SyntaxError::HighlightingFailed(e.to_string()))?;
486
487    // Use the reusable renderer via Mutex for thread-safe interior mutability
488    let html = {
489      let mut renderer = self.renderer.lock().map_err(|e| {
490        SyntaxError::HighlightingFailed(format!("Renderer lock poisoned: {e}"))
491      })?;
492      render(&highlights, &mut *renderer, theme)
493    };
494
495    Ok(html)
496  }
497
498  fn language_from_extension(&self, extension: &str) -> Option<String> {
499    match extension.to_lowercase().as_str() {
500      "rs" => Some("rust".to_string()),
501      "py" | "pyw" => Some("python".to_string()),
502      "js" | "mjs" => Some("javascript".to_string()),
503      "ts" => Some("typescript".to_string()),
504      "tsx" => Some("tsx".to_string()),
505      "nix" => Some("nix".to_string()),
506      "sh" | "bash" | "zsh" | "fish" => Some("bash".to_string()),
507      "c" | "h" => Some("c".to_string()),
508      "cpp" | "cxx" | "cc" | "hpp" | "hxx" | "hh" => Some("cpp".to_string()),
509      "cs" => Some("c_sharp".to_string()),
510      "go" => Some("go".to_string()),
511      "java" => Some("java".to_string()),
512      "json" => Some("json".to_string()),
513      "yaml" | "yml" => Some("yaml".to_string()),
514      "html" | "htm" => Some("html".to_string()),
515      "css" => Some("css".to_string()),
516      "md" | "markdown" => Some("markdown".to_string()),
517      "sql" => Some("sql".to_string()),
518      "lua" => Some("lua".to_string()),
519      "rb" => Some("ruby".to_string()),
520      "php" => Some("php".to_string()),
521      "hs" => Some("haskell".to_string()),
522      "ml" | "mli" => Some("ocaml".to_string()),
523      "scala" => Some("scala".to_string()),
524      "swift" => Some("swift".to_string()),
525      "s" | "asm" => Some("asm".to_string()),
526      "diff" | "patch" => Some("diff".to_string()),
527      "ex" | "exs" => Some("elixir".to_string()),
528      "zig" => Some("zig".to_string()),
529      "txt" => Some("text".to_string()),
530      _ => None,
531    }
532  }
533}
534
535/// Create a Syntastica-based syntax manager with default configuration.
536///
537/// Syntastica provides modern tree-sitter based syntax highlighting with
538/// excellent language support including native Nix highlighting.
539///
540/// # Errors
541///
542/// Returns an error if the Syntastica highlighter fails to initialize.
543pub fn create_syntastica_manager(
544  syntax_queries_dir: Option<&Path>,
545) -> SyntaxResult<SyntaxManager> {
546  let highlighter = Box::new(SyntasticaHighlighter::new(syntax_queries_dir)?);
547  let config = SyntaxConfig {
548    default_theme: Some("one-dark".to_string()),
549    ..Default::default()
550  };
551  Ok(SyntaxManager::new(highlighter, config))
552}
553
554#[cfg(test)]
555mod tests {
556  use super::*;
557
558  #[test]
559  fn test_is_extends_query() {
560    assert!(is_extends_query(";; extends\n(foo) @bar"));
561    assert!(is_extends_query(";;extends\n(foo) @bar"));
562    assert!(!is_extends_query("(foo) @bar"));
563    assert!(!is_extends_query(""));
564    assert!(!is_extends_query("; extends")); // single semicolon is a comment, not the directive
565  }
566
567  #[test]
568  fn test_rewrite_any_of_basic() {
569    let input = r#"((identifier) @_name (#any-of? @_name "foo" "bar"))"#;
570    let output = rewrite_any_of_predicates(input);
571    assert!(output.contains("#match?"));
572    assert!(output.contains("@_name"));
573    assert!(output.contains("^(foo|bar)$"));
574    assert!(!output.contains("#any-of?"));
575  }
576
577  #[test]
578  fn test_rewrite_any_of_multiple() {
579    let input = r#"
580      ((identifier) @a (#any-of? @a "x" "y"))
581      ((identifier) @b (#any-of? @b "p" "q" "r"))
582    "#;
583    let output = rewrite_any_of_predicates(input);
584    assert_eq!(output.matches("#match?").count(), 2);
585    assert!(!output.contains("#any-of?"));
586    assert!(output.contains("^(x|y)$"));
587    assert!(output.contains("^(p|q|r)$"));
588  }
589
590  #[test]
591  fn test_rewrite_any_of_regex_escaping() {
592    let input = r#"((identifier) @a (#any-of? @a "foo.bar" "baz"))"#;
593    let output = rewrite_any_of_predicates(input);
594    assert!(output.contains("foo\\.bar"));
595  }
596
597  #[test]
598  fn test_rewrite_any_of_no_match_passthrough() {
599    let input = "(foo) @bar (#eq? @bar \"baz\")";
600    let output = rewrite_any_of_predicates(input);
601    assert_eq!(input, output);
602  }
603
604  #[test]
605  fn test_rewrite_any_of_nvf_nix_query() {
606    // Matches the actual query from nvf's nix.nix
607    let input = r#"
608;; extends
609
610((apply_expression
611  function: (variable_expression
612    name: (identifier) @_func
613    (#any-of? @_func "mkLuaInline" "entryAnywhere"))
614  argument: (indented_string_expression
615    (string_fragment) @injection.content))
616(#set! injection.language "lua")
617(#set! injection.combined))
618"#;
619    let output = rewrite_any_of_predicates(input);
620    assert!(!output.contains("#any-of?"));
621    assert!(
622      output.contains("#match? @_func \"^(mkLuaInline|entryAnywhere)$\"")
623    );
624    // Non-any-of predicates must be preserved
625    assert!(output.contains("#set! injection.language"));
626    assert!(output.contains("#set! injection.combined"));
627    assert!(output.contains(";; extends"));
628  }
629}