arborium/
lib.rs

1//! Arborium — High-performance syntax highlighting
2//!
3//! Arborium provides batteries-included syntax highlighting powered by tree-sitter.
4//! It supports 60+ languages with automatic language injection (e.g., CSS/JS in HTML).
5//!
6//! # Quick Start
7//!
8//! ```rust,ignore
9//! use arborium::Highlighter;
10//!
11//! let mut hl = Highlighter::new();
12//! let html = hl.highlight("rust", "fn main() {}")?;
13//! // Output: <a-k>fn</a-k> <a-f>main</a-f>() {}
14//! ```
15//!
16//! # HTML vs ANSI Output
17//!
18//! Use [`Highlighter`] for HTML output (web pages, documentation):
19//!
20//! ```rust,ignore
21//! use arborium::{Highlighter, Config, HtmlFormat};
22//!
23//! // Default: custom elements (<a-k>, <a-f>, etc.)
24//! let mut hl = Highlighter::new();
25//!
26//! // Or use class-based output for CSS compatibility
27//! let config = Config {
28//!     html_format: HtmlFormat::ClassNames,
29//!     ..Default::default()
30//! };
31//! let mut hl = Highlighter::with_config(config);
32//! ```
33//!
34//! Use [`AnsiHighlighter`] for terminal output:
35//!
36//! ```rust,ignore
37//! use arborium::AnsiHighlighter;
38//! use arborium::theme::builtin;
39//!
40//! let theme = builtin::catppuccin_mocha().clone();
41//! let mut hl = AnsiHighlighter::new(theme);
42//! let colored = hl.highlight("rust", "fn main() {}")?;
43//! println!("{}", colored);
44//! ```
45//!
46//! # Language Support
47//!
48//! Enable languages via feature flags:
49//!
50//! ```toml
51//! [dependencies]
52//! arborium = { version = "0.1", features = ["lang-rust", "lang-python"] }
53//! ```
54//!
55//! Or enable all languages:
56//!
57//! ```toml
58//! [dependencies]
59//! arborium = { version = "0.1", features = ["all-languages"] }
60//! ```
61//!
62//! # Advanced Usage
63//!
64//! For building custom grammar providers or working with raw spans, see the
65//! [`advanced`] module.
66
67// Internal modules
68mod error;
69mod highlighter;
70pub(crate) mod store;
71
72// Public modules
73pub mod advanced;
74
75/// Theme system for ANSI output.
76///
77/// Re-exports types from `arborium-theme` for configuring syntax colors.
78pub mod theme {
79    pub use arborium_theme::theme::{builtin, Color, Modifiers, Style, Theme};
80}
81
82// Primary API exports
83pub use error::Error;
84pub use highlighter::{AnsiHighlighter, Highlighter};
85pub use store::GrammarStore;
86
87// Configuration types (re-exported from arborium-highlight)
88pub use arborium_highlight::HtmlFormat;
89
90/// Configuration for highlighting.
91///
92/// Controls injection depth and HTML output format.
93#[derive(Debug, Clone)]
94pub struct Config {
95    /// Maximum depth for processing language injections.
96    ///
97    /// - `0`: No injections (just primary language)
98    /// - `3`: Default, handles most cases (HTML with CSS/JS, Markdown with code blocks)
99    /// - Higher: For deeply nested content
100    pub max_injection_depth: u32,
101
102    /// HTML output format.
103    ///
104    /// See [`HtmlFormat`] for options.
105    pub html_format: HtmlFormat,
106}
107
108impl Default for Config {
109    fn default() -> Self {
110        Self {
111            max_injection_depth: 3,
112            html_format: HtmlFormat::default(),
113        }
114    }
115}
116
117impl From<Config> for arborium_highlight::HighlightConfig {
118    fn from(config: Config) -> Self {
119        arborium_highlight::HighlightConfig {
120            max_injection_depth: config.max_injection_depth,
121            html_format: config.html_format,
122        }
123    }
124}
125
126// Tree-sitter re-export for advanced users
127pub use arborium_tree_sitter as tree_sitter;
128
129// WASM allocator (automatically enabled on WASM targets)
130// Provides malloc/calloc/realloc/free symbols for tree-sitter's C code
131#[cfg(target_family = "wasm")]
132mod wasm;
133
134// Highlight names constant
135use arborium_theme::highlights;
136
137/// Standard highlight names used for syntax highlighting.
138///
139/// These names are used to configure tree-sitter's `HighlightConfiguration`.
140/// The indices correspond to HTML element tags (e.g., index 7 = `<a-k>` for keyword).
141pub const HIGHLIGHT_NAMES: [&str; highlights::COUNT] = highlights::names();
142
143/// Detect the language from a file path or name.
144///
145/// Extracts the file extension and maps it to a canonical language identifier.
146/// Returns `None` if the extension is not recognized.
147///
148/// # Example
149///
150/// ```rust
151/// use arborium::detect_language;
152///
153/// assert_eq!(detect_language("main.rs"), Some("rust"));
154/// assert_eq!(detect_language("/path/to/script.py"), Some("python"));
155/// assert_eq!(detect_language("styles.css"), Some("css"));
156/// assert_eq!(detect_language("unknown.xyz"), None);
157/// ```
158pub fn detect_language(path: &str) -> Option<&'static str> {
159    // Extract extension from path
160    let ext = path
161        .rsplit('.')
162        .next()
163        .filter(|e| !e.contains('/') && !e.contains('\\'))?;
164
165    // Map extension to canonical language ID
166    Some(match ext.to_lowercase().as_str() {
167
168        "ada" => "ada",
169
170        "adoc" => "asciidoc",
171
172        "agda" => "agda",
173
174        "asciidoc" => "asciidoc",
175
176        "asm" => "asm",
177
178        "assembly" => "asm",
179
180        "awk" => "awk",
181
182        "bash" => "bash",
183
184        "bat" => "batch",
185
186        "batch" => "batch",
187
188        "bazel" => "starlark",
189
190        "bzl" => "starlark",
191
192        "c" => "c",
193
194        "c++" => "cpp",
195
196        "c-sharp" => "c-sharp",
197
198        "caddy" => "caddy",
199
200        "capnp" => "capnp",
201
202        "cfg" => "ini",
203
204        "cjs" => "javascript",
205
206        "cl" => "commonlisp",
207
208        "clj" => "clojure",
209
210        "clojure" => "clojure",
211
212        "cmake" => "cmake",
213
214        "cmd" => "batch",
215
216        "commonlisp" => "commonlisp",
217
218        "conf" => "ini",
219
220        "cpp" => "cpp",
221
222        "cs" => "c-sharp",
223
224        "csharp" => "c-sharp",
225
226        "css" => "css",
227
228        "cts" => "typescript",
229
230        "cxx" => "cpp",
231
232        "d" => "d",
233
234        "dart" => "dart",
235
236        "devicetree" => "devicetree",
237
238        "diff" => "diff",
239
240        "dlang" => "d",
241
242        "docker" => "dockerfile",
243
244        "dockerfile" => "dockerfile",
245
246        "dot" => "dot",
247
248        "el" => "elisp",
249
250        "elisp" => "elisp",
251
252        "elixir" => "elixir",
253
254        "elm" => "elm",
255
256        "emacs-lisp" => "elisp",
257
258        "erl" => "erlang",
259
260        "erlang" => "erlang",
261
262        "ex" => "elixir",
263
264        "exs" => "elixir",
265
266        "f#" => "fsharp",
267
268        "fish" => "fish",
269
270        "frag" => "glsl",
271
272        "fs" => "fsharp",
273
274        "fsharp" => "fsharp",
275
276        "gleam" => "gleam",
277
278        "glsl" => "glsl",
279
280        "go" => "go",
281
282        "golang" => "go",
283
284        "gql" => "graphql",
285
286        "graphql" => "graphql",
287
288        "h" => "c",
289
290        "haskell" => "haskell",
291
292        "hcl" => "hcl",
293
294        "hlsl" => "hlsl",
295
296        "hpp" => "cpp",
297
298        "hs" => "haskell",
299
300        "htm" => "html",
301
302        "html" => "html",
303
304        "idr" => "idris",
305
306        "idris" => "idris",
307
308        "ini" => "ini",
309
310        "j2" => "jinja2",
311
312        "java" => "java",
313
314        "javascript" => "javascript",
315
316        "jinja" => "jinja2",
317
318        "jinja2" => "jinja2",
319
320        "jl" => "julia",
321
322        "jq" => "jq",
323
324        "js" => "javascript",
325
326        "json" => "json",
327
328        "jsonc" => "json",
329
330        "jsx" => "javascript",
331
332        "julia" => "julia",
333
334        "kdl" => "kdl",
335
336        "kotlin" => "kotlin",
337
338        "kt" => "kotlin",
339
340        "kts" => "kotlin",
341
342        "lean" => "lean",
343
344        "lisp" => "commonlisp",
345
346        "lua" => "lua",
347
348        "m" => "matlab",
349
350        "markdown" => "markdown",
351
352        "matlab" => "matlab",
353
354        "md" => "markdown",
355
356        "mdx" => "markdown",
357
358        "meson" => "meson",
359
360        "mjs" => "javascript",
361
362        "ml" => "ocaml",
363
364        "mm" => "objc",
365
366        "mts" => "typescript",
367
368        "mysql" => "sql",
369
370        "nasm" => "x86asm",
371
372        "nginx" => "nginx",
373
374        "ninja" => "ninja",
375
376        "nix" => "nix",
377
378        "objc" => "objc",
379
380        "objective-c" => "objc",
381
382        "ocaml" => "ocaml",
383
384        "patch" => "diff",
385
386        "pbtxt" => "textproto",
387
388        "perl" => "perl",
389
390        "php" => "php",
391
392        "pl" => "perl",
393
394        "pm" => "perl",
395
396        "postgres" => "sql",
397
398        "postgresql" => "sql",
399
400        "postscript" => "postscript",
401
402        "powershell" => "powershell",
403
404        "pro" => "prolog",
405
406        "prolog" => "prolog",
407
408        "ps" => "postscript",
409
410        "ps1" => "powershell",
411
412        "pwsh" => "powershell",
413
414        "py" => "python",
415
416        "py3" => "python",
417
418        "python" => "python",
419
420        "python3" => "python",
421
422        "query" => "query",
423
424        "r" => "r",
425
426        "rb" => "ruby",
427
428        "res" => "rescript",
429
430        "rescript" => "rescript",
431
432        "rkt" => "scheme",
433
434        "rlang" => "r",
435
436        "ron" => "ron",
437
438        "rq" => "sparql",
439
440        "rs" => "rust",
441
442        "ruby" => "ruby",
443
444        "rust" => "rust",
445
446        "sass" => "scss",
447
448        "scala" => "scala",
449
450        "scheme" => "scheme",
451
452        "scm" => "query",
453
454        "scss" => "scss",
455
456        "sh" => "bash",
457
458        "shell" => "bash",
459
460        "sparql" => "sparql",
461
462        "sql" => "sql",
463
464        "sqlite" => "sql",
465
466        "ss" => "scheme",
467
468        "ssh-config" => "ssh-config",
469
470        "starlark" => "starlark",
471
472        "sv" => "verilog",
473
474        "svelte" => "svelte",
475
476        "svg" => "xml",
477
478        "swift" => "swift",
479
480        "systemverilog" => "verilog",
481
482        "terraform" => "hcl",
483
484        "textpb" => "textproto",
485
486        "textproto" => "textproto",
487
488        "tf" => "hcl",
489
490        "thrift" => "thrift",
491
492        "tla" => "tlaplus",
493
494        "tlaplus" => "tlaplus",
495
496        "toml" => "toml",
497
498        "ts" => "typescript",
499
500        "tsx" => "tsx",
501
502        "typ" => "typst",
503
504        "typescript" => "typescript",
505
506        "typst" => "typst",
507
508        "ua" => "uiua",
509
510        "uiua" => "uiua",
511
512        "v" => "verilog",
513
514        "vb" => "vb",
515
516        "vbnet" => "vb",
517
518        "verilog" => "verilog",
519
520        "vert" => "glsl",
521
522        "vhd" => "vhdl",
523
524        "vhdl" => "vhdl",
525
526        "vim" => "vim",
527
528        "viml" => "vim",
529
530        "vimscript" => "vim",
531
532        "visualbasic" => "vb",
533
534        "vue" => "vue",
535
536        "x86" => "x86asm",
537
538        "x86asm" => "x86asm",
539
540        "xml" => "xml",
541
542        "xsl" => "xml",
543
544        "xslt" => "xml",
545
546        "yaml" => "yaml",
547
548        "yml" => "yaml",
549
550        "yuri" => "yuri",
551
552        "zig" => "zig",
553
554        "zsh" => "zsh",
555
556        _ => return None,
557    })
558}
559
560// =============================================================================
561// Language grammar re-exports based on enabled features.
562// Each module provides:
563// - `language()` - Returns the tree-sitter Language
564// - `HIGHLIGHTS_QUERY` - The highlight query string
565// - `INJECTIONS_QUERY` - The injection query string
566// - `LOCALS_QUERY` - The locals query string
567// =============================================================================
568
569
570#[cfg(feature = "lang-ada")]
571pub use arborium_ada as lang_ada;
572
573
574#[cfg(feature = "lang-agda")]
575pub use arborium_agda as lang_agda;
576
577
578#[cfg(feature = "lang-asciidoc")]
579pub use arborium_asciidoc as lang_asciidoc;
580
581
582#[cfg(feature = "lang-asm")]
583pub use arborium_asm as lang_asm;
584
585
586#[cfg(feature = "lang-awk")]
587pub use arborium_awk as lang_awk;
588
589
590#[cfg(feature = "lang-bash")]
591pub use arborium_bash as lang_bash;
592
593
594#[cfg(feature = "lang-batch")]
595pub use arborium_batch as lang_batch;
596
597
598#[cfg(feature = "lang-c")]
599pub use arborium_c as lang_c;
600
601
602#[cfg(feature = "lang-c-sharp")]
603pub use arborium_c_sharp as lang_c_sharp;
604
605
606#[cfg(feature = "lang-caddy")]
607pub use arborium_caddy as lang_caddy;
608
609
610#[cfg(feature = "lang-capnp")]
611pub use arborium_capnp as lang_capnp;
612
613
614#[cfg(feature = "lang-clojure")]
615pub use arborium_clojure as lang_clojure;
616
617
618#[cfg(feature = "lang-cmake")]
619pub use arborium_cmake as lang_cmake;
620
621
622#[cfg(feature = "lang-commonlisp")]
623pub use arborium_commonlisp as lang_commonlisp;
624
625
626#[cfg(feature = "lang-cpp")]
627pub use arborium_cpp as lang_cpp;
628
629
630#[cfg(feature = "lang-css")]
631pub use arborium_css as lang_css;
632
633
634#[cfg(feature = "lang-d")]
635pub use arborium_d as lang_d;
636
637
638#[cfg(feature = "lang-dart")]
639pub use arborium_dart as lang_dart;
640
641
642#[cfg(feature = "lang-devicetree")]
643pub use arborium_devicetree as lang_devicetree;
644
645
646#[cfg(feature = "lang-diff")]
647pub use arborium_diff as lang_diff;
648
649
650#[cfg(feature = "lang-dockerfile")]
651pub use arborium_dockerfile as lang_dockerfile;
652
653
654#[cfg(feature = "lang-dot")]
655pub use arborium_dot as lang_dot;
656
657
658#[cfg(feature = "lang-elisp")]
659pub use arborium_elisp as lang_elisp;
660
661
662#[cfg(feature = "lang-elixir")]
663pub use arborium_elixir as lang_elixir;
664
665
666#[cfg(feature = "lang-elm")]
667pub use arborium_elm as lang_elm;
668
669
670#[cfg(feature = "lang-erlang")]
671pub use arborium_erlang as lang_erlang;
672
673
674#[cfg(feature = "lang-fish")]
675pub use arborium_fish as lang_fish;
676
677
678#[cfg(feature = "lang-fsharp")]
679pub use arborium_fsharp as lang_fsharp;
680
681
682#[cfg(feature = "lang-gleam")]
683pub use arborium_gleam as lang_gleam;
684
685
686#[cfg(feature = "lang-glsl")]
687pub use arborium_glsl as lang_glsl;
688
689
690#[cfg(feature = "lang-go")]
691pub use arborium_go as lang_go;
692
693
694#[cfg(feature = "lang-graphql")]
695pub use arborium_graphql as lang_graphql;
696
697
698#[cfg(feature = "lang-haskell")]
699pub use arborium_haskell as lang_haskell;
700
701
702#[cfg(feature = "lang-hcl")]
703pub use arborium_hcl as lang_hcl;
704
705
706#[cfg(feature = "lang-hlsl")]
707pub use arborium_hlsl as lang_hlsl;
708
709
710#[cfg(feature = "lang-html")]
711pub use arborium_html as lang_html;
712
713
714#[cfg(feature = "lang-idris")]
715pub use arborium_idris as lang_idris;
716
717
718#[cfg(feature = "lang-ini")]
719pub use arborium_ini as lang_ini;
720
721
722#[cfg(feature = "lang-java")]
723pub use arborium_java as lang_java;
724
725
726#[cfg(feature = "lang-javascript")]
727pub use arborium_javascript as lang_javascript;
728
729
730#[cfg(feature = "lang-jinja2")]
731pub use arborium_jinja2 as lang_jinja2;
732
733
734#[cfg(feature = "lang-jq")]
735pub use arborium_jq as lang_jq;
736
737
738#[cfg(feature = "lang-json")]
739pub use arborium_json as lang_json;
740
741
742#[cfg(feature = "lang-julia")]
743pub use arborium_julia as lang_julia;
744
745
746#[cfg(feature = "lang-kdl")]
747pub use arborium_kdl as lang_kdl;
748
749
750#[cfg(feature = "lang-kotlin")]
751pub use arborium_kotlin as lang_kotlin;
752
753
754#[cfg(feature = "lang-lean")]
755pub use arborium_lean as lang_lean;
756
757
758#[cfg(feature = "lang-lua")]
759pub use arborium_lua as lang_lua;
760
761
762#[cfg(feature = "lang-markdown")]
763pub use arborium_markdown as lang_markdown;
764
765
766#[cfg(feature = "lang-matlab")]
767pub use arborium_matlab as lang_matlab;
768
769
770#[cfg(feature = "lang-meson")]
771pub use arborium_meson as lang_meson;
772
773
774#[cfg(feature = "lang-nginx")]
775pub use arborium_nginx as lang_nginx;
776
777
778#[cfg(feature = "lang-ninja")]
779pub use arborium_ninja as lang_ninja;
780
781
782#[cfg(feature = "lang-nix")]
783pub use arborium_nix as lang_nix;
784
785
786#[cfg(feature = "lang-objc")]
787pub use arborium_objc as lang_objc;
788
789
790#[cfg(feature = "lang-ocaml")]
791pub use arborium_ocaml as lang_ocaml;
792
793
794#[cfg(feature = "lang-perl")]
795pub use arborium_perl as lang_perl;
796
797
798#[cfg(feature = "lang-php")]
799pub use arborium_php as lang_php;
800
801
802#[cfg(feature = "lang-postscript")]
803pub use arborium_postscript as lang_postscript;
804
805
806#[cfg(feature = "lang-powershell")]
807pub use arborium_powershell as lang_powershell;
808
809
810#[cfg(feature = "lang-prolog")]
811pub use arborium_prolog as lang_prolog;
812
813
814#[cfg(feature = "lang-python")]
815pub use arborium_python as lang_python;
816
817
818#[cfg(feature = "lang-query")]
819pub use arborium_query as lang_query;
820
821
822#[cfg(feature = "lang-r")]
823pub use arborium_r as lang_r;
824
825
826#[cfg(feature = "lang-rescript")]
827pub use arborium_rescript as lang_rescript;
828
829
830#[cfg(feature = "lang-ron")]
831pub use arborium_ron as lang_ron;
832
833
834#[cfg(feature = "lang-ruby")]
835pub use arborium_ruby as lang_ruby;
836
837
838#[cfg(feature = "lang-rust")]
839pub use arborium_rust as lang_rust;
840
841
842#[cfg(feature = "lang-scala")]
843pub use arborium_scala as lang_scala;
844
845
846#[cfg(feature = "lang-scheme")]
847pub use arborium_scheme as lang_scheme;
848
849
850#[cfg(feature = "lang-scss")]
851pub use arborium_scss as lang_scss;
852
853
854#[cfg(feature = "lang-sparql")]
855pub use arborium_sparql as lang_sparql;
856
857
858#[cfg(feature = "lang-sql")]
859pub use arborium_sql as lang_sql;
860
861
862#[cfg(feature = "lang-ssh-config")]
863pub use arborium_ssh_config as lang_ssh_config;
864
865
866#[cfg(feature = "lang-starlark")]
867pub use arborium_starlark as lang_starlark;
868
869
870#[cfg(feature = "lang-svelte")]
871pub use arborium_svelte as lang_svelte;
872
873
874#[cfg(feature = "lang-swift")]
875pub use arborium_swift as lang_swift;
876
877
878#[cfg(feature = "lang-textproto")]
879pub use arborium_textproto as lang_textproto;
880
881
882#[cfg(feature = "lang-thrift")]
883pub use arborium_thrift as lang_thrift;
884
885
886#[cfg(feature = "lang-tlaplus")]
887pub use arborium_tlaplus as lang_tlaplus;
888
889
890#[cfg(feature = "lang-toml")]
891pub use arborium_toml as lang_toml;
892
893
894#[cfg(feature = "lang-tsx")]
895pub use arborium_tsx as lang_tsx;
896
897
898#[cfg(feature = "lang-typescript")]
899pub use arborium_typescript as lang_typescript;
900
901
902#[cfg(feature = "lang-typst")]
903pub use arborium_typst as lang_typst;
904
905
906#[cfg(feature = "lang-uiua")]
907pub use arborium_uiua as lang_uiua;
908
909
910#[cfg(feature = "lang-vb")]
911pub use arborium_vb as lang_vb;
912
913
914#[cfg(feature = "lang-verilog")]
915pub use arborium_verilog as lang_verilog;
916
917
918#[cfg(feature = "lang-vhdl")]
919pub use arborium_vhdl as lang_vhdl;
920
921
922#[cfg(feature = "lang-vim")]
923pub use arborium_vim as lang_vim;
924
925
926#[cfg(feature = "lang-vue")]
927pub use arborium_vue as lang_vue;
928
929
930#[cfg(feature = "lang-x86asm")]
931pub use arborium_x86asm as lang_x86asm;
932
933
934#[cfg(feature = "lang-xml")]
935pub use arborium_xml as lang_xml;
936
937
938#[cfg(feature = "lang-yaml")]
939pub use arborium_yaml as lang_yaml;
940
941
942#[cfg(feature = "lang-yuri")]
943pub use arborium_yuri as lang_yuri;
944
945
946#[cfg(feature = "lang-zig")]
947pub use arborium_zig as lang_zig;
948
949
950#[cfg(feature = "lang-zsh")]
951pub use arborium_zsh as lang_zsh;
952