Skip to main content

dialect/
lib.rs

1//! Types and traits for implementing syntax highlighting.
2
3#![warn(missing_debug_implementations, missing_docs, rust_2018_idioms)]
4
5use unicode_width::UnicodeWidthStr;
6
7pub mod themes;
8
9/// This trait is to be implemented by any type that syntax highlights source code for a particular
10/// language. This is done by taking in a string slice and outputting a vector of
11/// [`HighlightedSpan`](struct.HighlightedSpan.html)s.
12pub trait Highlight {
13    #[allow(missing_docs)]
14    fn highlight(&self, input: &str) -> Vec<HighlightedSpan>;
15}
16
17/// An individual fragment of highlighted text.
18#[derive(Debug, Clone, Eq, PartialEq, Hash)]
19pub struct HighlightedSpan {
20    /// the region of text being highlighted
21    pub range: std::ops::Range<usize>,
22    /// the highlight group it has been assigned
23    pub group: HighlightGroup,
24}
25
26/// The set of possible syntactical forms text can be assigned.
27#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, strum_macros::EnumIter)]
28pub enum HighlightGroup {
29    /// a keyword that controls the flow of execution within code, e.g. `if` or `for`
30    CtrlFlowKeyword,
31    /// any other kind of keyword
32    OtherKeyword,
33    /// the name of a function when defined
34    FunctionDef,
35    /// the name of a function when called
36    FunctionCall,
37    /// the name of a type when defined
38    TyDef,
39    /// the name of a type when used
40    TyUse,
41    /// the name of an interface/trait/typeclass when defined
42    InterfaceDef,
43    /// the name of an interface/trait/typeclass when used
44    InterfaceUse,
45    /// a ‘primitive’ baked into the language (e.g. `int` is a primitive type in C++, but
46    /// `std::vector` isn’t)
47    PrimitiveTy,
48    /// the name of a variable when defined
49    VariableDef,
50    /// the name of a variable when used
51    VariableUse,
52    /// the name of a struct/class member when defined
53    MemberDef,
54    /// the name of a struct/class member when used
55    MemberUse,
56    /// the name of a constant ‘variable’ when defined
57    ConstantDef,
58    /// the name of a constant ‘variable’ when used
59    ConstantUse,
60    /// the name of a module when defined
61    ModuleDef,
62    /// the name of a module when used
63    ModuleUse,
64    /// the name of a macro when defined
65    MacroDef,
66    /// the name of a macro when used
67    MacroUse,
68    /// the name of a special identifier (e.g. a symbol in Ruby or a lifetime in Rust) when defined
69    SpecialIdentDef,
70    /// the name of a special identifier when used
71    SpecialIdentUse,
72    /// the name of a function parameter
73    FunctionParam,
74    /// a number literal (whether integer or floating-point)
75    Number,
76    /// a string literal
77    String,
78    /// the delimiters around a string literal (`"` in most languages)
79    StringDelimiter,
80    /// a character literal
81    Character,
82    /// the delimiters around a character literal (`'` in most languages)
83    CharacterDelimiter,
84    /// a boolean literal (only to be used if it is a keyword in the language -- if boolean values
85    /// are ‘just’ normal types like in Python and Haskell, then the `TyUse` variant is more
86    /// appropriate)
87    Boolean,
88    /// a pre-processor invocation that is not a macro itself (e.g. `#if` and `#define` in C)
89    PreProc,
90    /// the name of something that is an attribute of another thing (e.g. the word `derive` in
91    /// `#[derive(Debug)]` in Rust, or a decorator in Python)
92    Attribute,
93    /// a comment
94    Comment,
95    /// a documentation comment
96    DocComment,
97    /// an operator that accesses the members of something, regardless of whether this is some kind
98    /// of ‘object’ or a module, e.g. `.` and `::` in Rust
99    MemberOper,
100    /// an operator relating to pointers (e.g. `*` and `&` in C, those as well as `&mut` in Rust)
101    PointerOper,
102    /// an operator that assigns a value to a binding of some sort (`=` and `+=` are examples)
103    AssignOper,
104    /// an operator that has two operands (e.g. `+`, `||`)
105    BinaryOper,
106    /// any operator not covered by the other variants
107    OtherOper,
108    /// a delimiter (e.g. `(`)
109    Delimiter,
110    /// a separator of something (e.g. `,` or `->`)
111    Separator,
112    /// a terminator of something (e.g. `;`)
113    Terminator,
114    /// an error
115    Error,
116}
117
118/// An individual styled grapheme.
119#[derive(Debug, Clone, Eq, PartialEq, Hash)]
120pub struct StyledGrapheme {
121    /// the grapheme
122    pub grapheme: smol_str::SmolStr,
123    /// the style it has been given
124    pub style: ResolvedStyle,
125}
126
127impl UnicodeWidthStr for StyledGrapheme {
128    fn width(&self) -> usize {
129        self.grapheme.as_str().width()
130    }
131
132    fn width_cjk(&self) -> usize {
133        self.grapheme.as_str().width_cjk()
134    }
135}
136
137/// An RGB colour.
138#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
139pub struct Rgb {
140    /// red
141    pub r: u8,
142    /// green
143    pub g: u8,
144    /// blue
145    pub b: u8,
146}
147
148impl From<Rgb> for ansi_term::Colour {
149    fn from(rgb: Rgb) -> Self {
150        Self::RGB(rgb.r, rgb.g, rgb.b)
151    }
152}
153
154/// Allows easy creation of a [`Rgb`](struct.Rgb.html).
155#[macro_export]
156macro_rules! rgb {
157    ($r:literal, $g:literal, $b:literal) => {
158        $crate::Rgb {
159            r: $r,
160            g: $g,
161            b: $b,
162        }
163    };
164}
165
166/// The styling applied to a given [`HighlightGroup`](enum.HighlightGroup.html).
167///
168/// When a field is given a `None` value, then that field’s value defaults to that of the theme’s
169/// default style. It was decided that only colours are to be optional, because it is exceedingly
170/// rare that an entire theme wishes to be bold, italic or underlined.
171#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default)]
172pub struct Style {
173    /// its foreground colour
174    pub fg_color: Option<Rgb>,
175    /// its background colour
176    pub bg_color: Option<Rgb>,
177    /// whether to bolden
178    pub is_bold: bool,
179    /// whether to italicise
180    pub is_italic: bool,
181    /// whether to underline
182    pub is_underline: bool,
183}
184
185impl Style {
186    /// Creates a new Style with all colour fields set to `None` and all boolean fields set to
187    /// false, thereby creating a style whose value is identical to that of the theme’s default
188    /// style (assuming that the theme’s default style also uses false for all boolean options).
189    pub fn new() -> Self {
190        Self {
191            fg_color: None,
192            bg_color: None,
193            is_bold: false,
194            is_italic: false,
195            is_underline: false,
196        }
197    }
198
199    fn resolve(self, resolved: ResolvedStyle) -> ResolvedStyle {
200        ResolvedStyle {
201            fg_color: self.fg_color.unwrap_or(resolved.fg_color),
202            bg_color: self.bg_color.unwrap_or(resolved.bg_color),
203            is_bold: self.is_bold,
204            is_italic: self.is_italic,
205            is_underline: self.is_underline,
206        }
207    }
208}
209
210/// Identical to a [`Style`](struct.Style.html), except that all its fields are mandatory.
211#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
212pub struct ResolvedStyle {
213    /// its foreground colour
214    pub fg_color: Rgb,
215    /// its background colour
216    pub bg_color: Rgb,
217    /// whether to bolden
218    pub is_bold: bool,
219    /// whether to italicise
220    pub is_italic: bool,
221    /// whether to underline
222    pub is_underline: bool,
223}
224
225impl From<ResolvedStyle> for ansi_term::Style {
226    fn from(style: ResolvedStyle) -> Self {
227        Self {
228            foreground: Some(style.fg_color.into()),
229            background: Some(style.bg_color.into()),
230            is_bold: style.is_bold,
231            is_italic: style.is_italic,
232            is_underline: style.is_underline,
233
234            // These fields aren’t useful in the context of syntax highlighting, with the exception
235            // of ‘is_dimmed’. The reason why ‘is_dimmed’ cannot be used by theme authors is that
236            // its appearance depends on what colour the terminal picks, which can vary. This also
237            // ensures consistency.
238            is_dimmed: false,
239            is_blink: false,
240            is_reverse: false,
241            is_hidden: false,
242            is_strikethrough: false,
243        }
244    }
245}
246
247/// A trait for defining syntax highlighting themes.
248pub trait Theme {
249    /// The style for unhighlighted text. To understand why this must be a fully resolved style,
250    /// consider the following example:
251    ///
252    /// - `default_style` returns a [`Style`](struct.Style.html) which omits a foreground colour -
253    ///   at some point a [highlighter](trait.Highlight.html) returns a
254    ///   [`HighlightedSpan`](struct.HighlightedSpan.html) without a highlight group
255    /// - when [`render`](fn.render.html) is called, what is the foreground colour of this
256    ///   unhighlighted HighlightedSpan?
257    ///
258    /// To prevent situations like this, `default_style` acts as a fallback for all cases by forcing
259    /// the implementor to define all of the style’s fields.
260    fn default_style(&self) -> ResolvedStyle;
261
262    /// Provides a mapping from `HighlightGroup`s to `Style`s. As `HighlightGroup`s contain a
263    /// variant for unhighlighted text, this thereby defines the appearance of the whole text
264    /// field.
265    fn style(&self, group: HighlightGroup) -> Style;
266}
267
268/// A convenience function that renders a given input text using a given highlighter and theme,
269/// returning a vector of `StyledGrapheme`s.
270pub fn render<H, T>(input: &str, highlighter: H, theme: T) -> Vec<StyledGrapheme>
271where
272    H: Highlight,
273    T: Theme,
274{
275    use std::collections::HashMap;
276    use strum::IntoEnumIterator;
277    use unicode_segmentation::UnicodeSegmentation;
278
279    // The key is the highlight group, the value is the style the theme gives to this group.
280    let styles: HashMap<_, _> = HighlightGroup::iter()
281        .map(|group| (group, theme.style(group)))
282        .collect();
283
284    let spans = highlighter.highlight(input);
285
286    let num_chars = input.chars().count();
287    let mut output = Vec::with_capacity(num_chars);
288
289    'graphemes: for (idx, grapheme) in input.grapheme_indices(true) {
290        let grapheme = smol_str::SmolStr::from(grapheme);
291
292        for span in spans.iter() {
293            // We’ve found the span that contains the current grapheme, so we add the grapheme to
294            // the output and go to the next grapheme.
295            if span.range.contains(&idx) {
296                output.push(StyledGrapheme {
297                    grapheme,
298                    style: styles[&span.group].resolve(theme.default_style()),
299                });
300                continue 'graphemes;
301            }
302        }
303
304        // At this point the grapheme has not been found in any of the spans outputted by the
305        // highlighter, meaning that it has not been styled. This means we should give it the
306        // theme’s default style.
307        output.push(StyledGrapheme {
308            grapheme,
309            style: theme.default_style(),
310        });
311    }
312
313    output
314}