dialect/lib.rs
1//! Types and traits for implementing syntax highlighting.
2
3#![warn(missing_debug_implementations, missing_docs, rust_2018_idioms)]
4
5use unicode_width::UnicodeWidthStr;
6
7pub mod themes;
8
9/// This trait is to be implemented by any type that syntax highlights source code for a particular
10/// language. This is done by taking in a string slice and outputting a vector of
11/// [`HighlightedSpan`](struct.HighlightedSpan.html)s.
12pub trait Highlight {
13 #[allow(missing_docs)]
14 fn highlight(&self, input: &str) -> Vec<HighlightedSpan>;
15}
16
17/// An individual fragment of highlighted text.
18#[derive(Debug, Clone, Eq, PartialEq, Hash)]
19pub struct HighlightedSpan {
20 /// the region of text being highlighted
21 pub range: std::ops::Range<usize>,
22 /// the highlight group it has been assigned
23 pub group: HighlightGroup,
24}
25
26/// The set of possible syntactical forms text can be assigned.
27#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, strum_macros::EnumIter)]
28pub enum HighlightGroup {
29 /// a keyword that controls the flow of execution within code, e.g. `if` or `for`
30 CtrlFlowKeyword,
31 /// any other kind of keyword
32 OtherKeyword,
33 /// the name of a function when defined
34 FunctionDef,
35 /// the name of a function when called
36 FunctionCall,
37 /// the name of a type when defined
38 TyDef,
39 /// the name of a type when used
40 TyUse,
41 /// the name of an interface/trait/typeclass when defined
42 InterfaceDef,
43 /// the name of an interface/trait/typeclass when used
44 InterfaceUse,
45 /// a ‘primitive’ baked into the language (e.g. `int` is a primitive type in C++, but
46 /// `std::vector` isn’t)
47 PrimitiveTy,
48 /// the name of a variable when defined
49 VariableDef,
50 /// the name of a variable when used
51 VariableUse,
52 /// the name of a struct/class member when defined
53 MemberDef,
54 /// the name of a struct/class member when used
55 MemberUse,
56 /// the name of a constant ‘variable’ when defined
57 ConstantDef,
58 /// the name of a constant ‘variable’ when used
59 ConstantUse,
60 /// the name of a module when defined
61 ModuleDef,
62 /// the name of a module when used
63 ModuleUse,
64 /// the name of a macro when defined
65 MacroDef,
66 /// the name of a macro when used
67 MacroUse,
68 /// the name of a special identifier (e.g. a symbol in Ruby or a lifetime in Rust) when defined
69 SpecialIdentDef,
70 /// the name of a special identifier when used
71 SpecialIdentUse,
72 /// the name of a function parameter
73 FunctionParam,
74 /// a number literal (whether integer or floating-point)
75 Number,
76 /// a string literal
77 String,
78 /// the delimiters around a string literal (`"` in most languages)
79 StringDelimiter,
80 /// a character literal
81 Character,
82 /// the delimiters around a character literal (`'` in most languages)
83 CharacterDelimiter,
84 /// a boolean literal (only to be used if it is a keyword in the language -- if boolean values
85 /// are ‘just’ normal types like in Python and Haskell, then the `TyUse` variant is more
86 /// appropriate)
87 Boolean,
88 /// a pre-processor invocation that is not a macro itself (e.g. `#if` and `#define` in C)
89 PreProc,
90 /// the name of something that is an attribute of another thing (e.g. the word `derive` in
91 /// `#[derive(Debug)]` in Rust, or a decorator in Python)
92 Attribute,
93 /// a comment
94 Comment,
95 /// a documentation comment
96 DocComment,
97 /// an operator that accesses the members of something, regardless of whether this is some kind
98 /// of ‘object’ or a module, e.g. `.` and `::` in Rust
99 MemberOper,
100 /// an operator relating to pointers (e.g. `*` and `&` in C, those as well as `&mut` in Rust)
101 PointerOper,
102 /// an operator that assigns a value to a binding of some sort (`=` and `+=` are examples)
103 AssignOper,
104 /// an operator that has two operands (e.g. `+`, `||`)
105 BinaryOper,
106 /// any operator not covered by the other variants
107 OtherOper,
108 /// a delimiter (e.g. `(`)
109 Delimiter,
110 /// a separator of something (e.g. `,` or `->`)
111 Separator,
112 /// a terminator of something (e.g. `;`)
113 Terminator,
114 /// an error
115 Error,
116}
117
118/// An individual styled grapheme.
119#[derive(Debug, Clone, Eq, PartialEq, Hash)]
120pub struct StyledGrapheme {
121 /// the grapheme
122 pub grapheme: smol_str::SmolStr,
123 /// the style it has been given
124 pub style: ResolvedStyle,
125}
126
127impl UnicodeWidthStr for StyledGrapheme {
128 fn width(&self) -> usize {
129 self.grapheme.as_str().width()
130 }
131
132 fn width_cjk(&self) -> usize {
133 self.grapheme.as_str().width_cjk()
134 }
135}
136
137/// An RGB colour.
138#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
139pub struct Rgb {
140 /// red
141 pub r: u8,
142 /// green
143 pub g: u8,
144 /// blue
145 pub b: u8,
146}
147
148impl From<Rgb> for ansi_term::Colour {
149 fn from(rgb: Rgb) -> Self {
150 Self::RGB(rgb.r, rgb.g, rgb.b)
151 }
152}
153
154/// Allows easy creation of a [`Rgb`](struct.Rgb.html).
155#[macro_export]
156macro_rules! rgb {
157 ($r:literal, $g:literal, $b:literal) => {
158 $crate::Rgb {
159 r: $r,
160 g: $g,
161 b: $b,
162 }
163 };
164}
165
166/// The styling applied to a given [`HighlightGroup`](enum.HighlightGroup.html).
167///
168/// When a field is given a `None` value, then that field’s value defaults to that of the theme’s
169/// default style. It was decided that only colours are to be optional, because it is exceedingly
170/// rare that an entire theme wishes to be bold, italic or underlined.
171#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default)]
172pub struct Style {
173 /// its foreground colour
174 pub fg_color: Option<Rgb>,
175 /// its background colour
176 pub bg_color: Option<Rgb>,
177 /// whether to bolden
178 pub is_bold: bool,
179 /// whether to italicise
180 pub is_italic: bool,
181 /// whether to underline
182 pub is_underline: bool,
183}
184
185impl Style {
186 /// Creates a new Style with all colour fields set to `None` and all boolean fields set to
187 /// false, thereby creating a style whose value is identical to that of the theme’s default
188 /// style (assuming that the theme’s default style also uses false for all boolean options).
189 pub fn new() -> Self {
190 Self {
191 fg_color: None,
192 bg_color: None,
193 is_bold: false,
194 is_italic: false,
195 is_underline: false,
196 }
197 }
198
199 fn resolve(self, resolved: ResolvedStyle) -> ResolvedStyle {
200 ResolvedStyle {
201 fg_color: self.fg_color.unwrap_or(resolved.fg_color),
202 bg_color: self.bg_color.unwrap_or(resolved.bg_color),
203 is_bold: self.is_bold,
204 is_italic: self.is_italic,
205 is_underline: self.is_underline,
206 }
207 }
208}
209
210/// Identical to a [`Style`](struct.Style.html), except that all its fields are mandatory.
211#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
212pub struct ResolvedStyle {
213 /// its foreground colour
214 pub fg_color: Rgb,
215 /// its background colour
216 pub bg_color: Rgb,
217 /// whether to bolden
218 pub is_bold: bool,
219 /// whether to italicise
220 pub is_italic: bool,
221 /// whether to underline
222 pub is_underline: bool,
223}
224
225impl From<ResolvedStyle> for ansi_term::Style {
226 fn from(style: ResolvedStyle) -> Self {
227 Self {
228 foreground: Some(style.fg_color.into()),
229 background: Some(style.bg_color.into()),
230 is_bold: style.is_bold,
231 is_italic: style.is_italic,
232 is_underline: style.is_underline,
233
234 // These fields aren’t useful in the context of syntax highlighting, with the exception
235 // of ‘is_dimmed’. The reason why ‘is_dimmed’ cannot be used by theme authors is that
236 // its appearance depends on what colour the terminal picks, which can vary. This also
237 // ensures consistency.
238 is_dimmed: false,
239 is_blink: false,
240 is_reverse: false,
241 is_hidden: false,
242 is_strikethrough: false,
243 }
244 }
245}
246
247/// A trait for defining syntax highlighting themes.
248pub trait Theme {
249 /// The style for unhighlighted text. To understand why this must be a fully resolved style,
250 /// consider the following example:
251 ///
252 /// - `default_style` returns a [`Style`](struct.Style.html) which omits a foreground colour -
253 /// at some point a [highlighter](trait.Highlight.html) returns a
254 /// [`HighlightedSpan`](struct.HighlightedSpan.html) without a highlight group
255 /// - when [`render`](fn.render.html) is called, what is the foreground colour of this
256 /// unhighlighted HighlightedSpan?
257 ///
258 /// To prevent situations like this, `default_style` acts as a fallback for all cases by forcing
259 /// the implementor to define all of the style’s fields.
260 fn default_style(&self) -> ResolvedStyle;
261
262 /// Provides a mapping from `HighlightGroup`s to `Style`s. As `HighlightGroup`s contain a
263 /// variant for unhighlighted text, this thereby defines the appearance of the whole text
264 /// field.
265 fn style(&self, group: HighlightGroup) -> Style;
266}
267
268/// A convenience function that renders a given input text using a given highlighter and theme,
269/// returning a vector of `StyledGrapheme`s.
270pub fn render<H, T>(input: &str, highlighter: H, theme: T) -> Vec<StyledGrapheme>
271where
272 H: Highlight,
273 T: Theme,
274{
275 use std::collections::HashMap;
276 use strum::IntoEnumIterator;
277 use unicode_segmentation::UnicodeSegmentation;
278
279 // The key is the highlight group, the value is the style the theme gives to this group.
280 let styles: HashMap<_, _> = HighlightGroup::iter()
281 .map(|group| (group, theme.style(group)))
282 .collect();
283
284 let spans = highlighter.highlight(input);
285
286 let num_chars = input.chars().count();
287 let mut output = Vec::with_capacity(num_chars);
288
289 'graphemes: for (idx, grapheme) in input.grapheme_indices(true) {
290 let grapheme = smol_str::SmolStr::from(grapheme);
291
292 for span in spans.iter() {
293 // We’ve found the span that contains the current grapheme, so we add the grapheme to
294 // the output and go to the next grapheme.
295 if span.range.contains(&idx) {
296 output.push(StyledGrapheme {
297 grapheme,
298 style: styles[&span.group].resolve(theme.default_style()),
299 });
300 continue 'graphemes;
301 }
302 }
303
304 // At this point the grapheme has not been found in any of the spans outputted by the
305 // highlighter, meaning that it has not been styled. This means we should give it the
306 // theme’s default style.
307 output.push(StyledGrapheme {
308 grapheme,
309 style: theme.default_style(),
310 });
311 }
312
313 output
314}