topiary_core/
lib.rs

1//! A general code formatter that relies on
2//! [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) for language
3//! parsing.
4//!
5//! In order for a language to be supported, there must be a [Tree-sitter
6//! grammar](https://tree-sitter.github.io/tree-sitter/#available-parsers)
7//! available, and there must be a query file that dictates how that language is
8//! to be formatted. We include query files for some languages.
9//!
10//! More details can be found on
11//! [GitHub](https://github.com/tweag/topiary).
12
13use std::io;
14
15use pretty_assertions::StrComparison;
16use tree_sitter::Position;
17
18pub use crate::{
19    error::{FormatterError, IoError},
20    language::Language,
21    tree_sitter::{apply_query, CoverageData, SyntaxNode, TopiaryQuery, Visualisation},
22};
23
24mod atom_collection;
25mod error;
26mod graphviz;
27mod language;
28mod pretty;
29mod tree_sitter;
30
31#[doc(hidden)]
32pub mod test_utils;
33
34#[derive(Clone, Debug, Eq, PartialEq)]
35pub struct ScopeInformation {
36    line_number: u32,
37    scope_id: String,
38}
39
40#[derive(Clone, Debug, Default, Eq, PartialEq)]
41pub enum Capitalisation {
42    UpperCase,
43    LowerCase,
44    #[default]
45    Pass,
46}
47/// An atom represents a small piece of the output. We turn Tree-sitter nodes
48/// into atoms, and we add white-space atoms where appropriate. The final list
49/// of atoms is rendered to the output.
50#[derive(Clone, Debug, Default, Eq, PartialEq)]
51pub enum Atom {
52    /// We don't allow consecutive `Hardline`, but a `Blankline` will render two
53    /// newlines to produce a blank line.
54    Blankline,
55    /// A "no-op" atom that will not produce any output.
56    #[default]
57    Empty,
58    /// Represents a newline.
59    Hardline,
60    /// Signals the end of an indentation block.
61    IndentEnd,
62    /// Signals the start of an indentation block. Any lines between the
63    /// beginning and the end will be indented. In single-line constructs where
64    /// the beginning and the end occurs on the same line, there will be no
65    /// indentation.
66    IndentStart,
67    /// Represents the contents of a named Tree-sitter node. We track the node id here
68    /// as well.
69    Leaf {
70        content: String,
71        id: usize,
72        original_position: Position,
73        // marks the leaf to be printed on a single line, with no indentation
74        single_line_no_indent: bool,
75        // if the leaf is multi-line, each line will be indented, not just the first
76        multi_line_indent_all: bool,
77        capitalisation: Capitalisation,
78    },
79    /// Represents a literal string, such as a semicolon.
80    Literal(String),
81    /// Represents a softline. It will be turned into a hardline for multi-line
82    /// constructs, and either a space or nothing for single-line constructs.
83    Softline {
84        spaced: bool,
85    },
86    /// Represents a space. Consecutive spaces are reduced to one before rendering.
87    Space,
88    /// Represents the destruction of errant spaces. Adjacent consecutive spaces are
89    /// reduced to zero before rendering.
90    Antispace,
91    /// Represents a segment to be deleted.
92    // It is a segment, because if one wants to delete a node,
93    // it might happen that it contains several leaves.
94    DeleteBegin,
95    DeleteEnd,
96
97    CaseBegin(Capitalisation),
98    CaseEnd,
99    /// Indicates the beginning of a scope, use in combination with the
100    /// ScopedSoftlines and ScopedConditionals below.
101    ScopeBegin(ScopeInformation),
102    /// Indicates the end of a scope, use in combination with the
103    /// ScopedSoftlines and ScopedConditionals below.
104    ScopeEnd(ScopeInformation),
105    // Indicates the beginning of a *measuring* scope, that must be related to a "normal" one.
106    // Used in combination with ScopedSoftlines and ScopedConditionals below.
107    MeasuringScopeBegin(ScopeInformation),
108    // Indicates the end of a *measuring* scope, that must be related to a "normal" one.
109    // Used in combination with ScopedSoftlines and ScopedConditionals below.
110    MeasuringScopeEnd(ScopeInformation),
111    /// Scoped commands
112    // ScopedSoftline works together with the @{prepend,append}_begin[_measuring]_scope and
113    // @{prepend,append}_end[_measuring]_scope query tags. To decide if a scoped softline
114    // must be expanded into a hardline, we look at the innermost scope having
115    // the corresponding `scope_id`, that encompasses it. We expand the softline
116    // if that scope is multi-line.
117    // If that scope contains a *measuring* scope with the same `scope_id`, we expand
118    // the node iff that *measuring* scope is multi-line.
119    // The `id` value is here for technical reasons,
120    // it allows tracking of the atom during post-processing.
121    ScopedSoftline {
122        id: usize,
123        scope_id: String,
124        spaced: bool,
125    },
126    /// Represents an atom that must only be output if the associated scope
127    /// (or its associated measuring scope, see above) meets the condition
128    /// (single-line or multi-line).
129    ScopedConditional {
130        id: usize,
131        scope_id: String,
132        condition: ScopeCondition,
133        atom: Box<Atom>,
134    },
135}
136
137impl Atom {
138    /// This function is only expected to take spaces and newlines as argument.
139    /// It defines the order Blankline > Hardline > Space > Empty.
140    pub(crate) fn dominates(&self, other: &Atom) -> bool {
141        match self {
142            Atom::Empty => false,
143            Atom::Space => matches!(other, Atom::Empty),
144            Atom::Hardline => matches!(other, Atom::Space | Atom::Empty),
145            Atom::Blankline => matches!(other, Atom::Hardline | Atom::Space | Atom::Empty),
146            _ => panic!("Unexpected character in is_dominant"),
147        }
148    }
149}
150
151/// Used in `Atom::ScopedConditional` to apply the containing Atoms only if
152/// the matched node spans a single line or multiple lines
153#[derive(Clone, Copy, Debug, Eq, PartialEq)]
154pub enum ScopeCondition {
155    /// The Atom is only applied if the matching node spans exactly one line
156    SingleLineOnly,
157    /// The Atom is only applied if the matching node spans two or more lines
158    MultiLineOnly,
159}
160
161/// A convenience wrapper around `std::result::Result<T, FormatterError>`.
162pub type FormatterResult<T> = std::result::Result<T, FormatterError>;
163
164/// Operations that can be performed by the formatter.
165#[derive(Clone, Copy, Debug)]
166pub enum Operation {
167    /// Formatting is the default operation of the formatter, it applies the
168    /// formatting rules defined in the query file and outputs the result
169    Format {
170        /// If true, skips the idempotence check (where we format twice,
171        /// succeeding only if the intermediate and final result are identical)
172        skip_idempotence: bool,
173        /// If true, Topiary will consider an ERROR as it does a leaf node,
174        /// and continues formatting instead of exiting with an error
175        tolerate_parsing_errors: bool,
176    },
177    /// Visualises the parsed file's tree-sitter tree
178    Visualise {
179        /// Choose the type of visualation Topiary should output
180        output_format: Visualisation,
181    },
182}
183
184/// The function that takes an input and formats, or visualises an output.
185///
186/// # Errors
187///
188/// If formatting fails for any reason, a `FormatterError` will be returned.
189///
190/// # Examples
191///
192/// ```
193/// # tokio_test::block_on(async {
194/// use std::fs::File;
195/// use std::io::{BufReader, Read};
196/// use topiary_core::{formatter, Language, FormatterError, TopiaryQuery, Operation};
197///
198/// let input = "[1,2]".to_string();
199/// let mut input = input.as_bytes();
200/// let mut output = Vec::new();
201/// let json = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
202///
203/// let mut query_file = BufReader::new(File::open("../topiary-queries/queries/json.scm").expect("query file"));
204/// let mut query_content = String::new();
205/// query_file.read_to_string(&mut query_content).expect("read query file");
206///
207/// let language: Language = Language {
208///     name: "json".to_owned(),
209///     query: TopiaryQuery::new(&json.clone().into(), &query_content).unwrap(),
210///     grammar: json.into(),
211///     indent: None,
212/// };
213///
214/// match formatter(&mut input, &mut output, &language, Operation::Format{ skip_idempotence: false, tolerate_parsing_errors: false }) {
215///   Ok(()) => {
216///     let formatted = String::from_utf8(output).expect("valid utf-8");
217///   }
218///   Err(FormatterError::Query(message, _)) => {
219///     panic!("Error in query file: {message}");
220///   }
221///   Err(_) => {
222///     panic!("An error occurred");
223///   }
224/// }
225/// # }) // end tokio_test
226/// ```
227pub fn formatter(
228    input: &mut impl io::Read,
229    output: &mut impl io::Write,
230    language: &Language,
231    operation: Operation,
232) -> FormatterResult<()> {
233    let content = read_input(input).map_err(|e| {
234        FormatterError::Io(IoError::Filesystem(
235            "Failed to read input contents".into(),
236            e,
237        ))
238    })?;
239
240    match operation {
241        Operation::Format {
242            skip_idempotence,
243            tolerate_parsing_errors,
244        } => {
245            // All the work related to tree-sitter and the query is done here
246            log::info!("Apply Tree-sitter query");
247
248            let mut atoms = tree_sitter::apply_query(
249                &content,
250                &language.query,
251                &language.grammar,
252                tolerate_parsing_errors,
253            )?;
254
255            // Various post-processing of whitespace
256            atoms.post_process();
257
258            // Pretty-print atoms
259            log::info!("Pretty-print output");
260            let rendered = pretty::render(
261                &atoms[..],
262                // Default to "  " if the language has no indentation specified
263                language.indent.as_ref().map_or("  ", |v| v.as_str()),
264            )?;
265
266            // Add a final line break if missing
267            let rendered = format!("{}\n", rendered.trim());
268
269            if !skip_idempotence {
270                idempotence_check(&rendered, language, tolerate_parsing_errors)?;
271            }
272
273            write!(output, "{rendered}")?;
274        }
275
276        Operation::Visualise { output_format } => {
277            let tree = tree_sitter::parse(&content, &language.grammar, false)?;
278            let root: SyntaxNode = tree.root_node().into();
279
280            match output_format {
281                Visualisation::GraphViz => graphviz::write(output, &root)?,
282                Visualisation::Json => serde_json::to_writer(output, &root)?,
283            };
284        }
285    };
286
287    Ok(())
288}
289
290pub fn coverage(
291    input: &mut impl io::Read,
292    output: &mut impl io::Write,
293    language: &Language,
294) -> FormatterResult<()> {
295    let content = read_input(input).map_err(|e| {
296        FormatterError::Io(IoError::Filesystem(
297            "Failed to read input contents".into(),
298            e,
299        ))
300    })?;
301
302    let res = tree_sitter::check_query_coverage(&content, &language.query, &language.grammar)?;
303
304    let queries_string = if res.missing_patterns.is_empty() {
305        if res.cover_percentage == 0.0 {
306            "No queries found".into()
307        } else {
308            "All queries are matched".into()
309        }
310    } else {
311        format!(
312            "Unmatched queries:\n{}",
313            &res.missing_patterns[..].join("\n"),
314        )
315    };
316
317    write!(
318        output,
319        "Query coverage: {:.2}%\n{}\n",
320        res.cover_percentage * 100.0,
321        queries_string,
322    )?;
323
324    if res.cover_percentage == 1.0 {
325        Ok(())
326    } else {
327        Err(FormatterError::PatternDoesNotMatch)
328    }
329}
330
331/// Simple helper function to read the full content of an io Read stream
332fn read_input(input: &mut dyn io::Read) -> Result<String, io::Error> {
333    let mut content = String::new();
334    input.read_to_string(&mut content)?;
335    Ok(content)
336}
337
338/// Perform the idempotence check. Given the already formatted content of the
339/// file, formats the content again and checks if the two are identical.
340/// Result in: `Ok(())`` if the idempotence check succeeded (the content is
341/// identical to the formatted content)
342///
343/// # Errors
344///
345/// `Err(FormatterError::Idempotence)` if the idempotence check failed
346/// `Err(FormatterError::Formatting(...))` if the formatting failed
347fn idempotence_check(
348    content: &str,
349    language: &Language,
350    tolerate_parsing_errors: bool,
351) -> FormatterResult<()> {
352    log::info!("Checking for idempotence ...");
353
354    let mut input = content.as_bytes();
355    let mut output = io::BufWriter::new(Vec::new());
356
357    match formatter(
358        &mut input,
359        &mut output,
360        language,
361        Operation::Format {
362            skip_idempotence: true,
363            tolerate_parsing_errors,
364        },
365    ) {
366        Ok(()) => {
367            let reformatted = String::from_utf8(output.into_inner()?)?;
368
369            if content == reformatted {
370                Ok(())
371            } else {
372                log::error!("Failed idempotence check");
373                log::error!("{}", StrComparison::new(content, &reformatted));
374                Err(FormatterError::Idempotence)
375            }
376        }
377        Err(error @ FormatterError::Parsing { .. }) => {
378            Err(FormatterError::IdempotenceParsing(Box::new(error)))
379        }
380        Err(error) => Err(error),
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use std::fs;
387
388    use test_log::test;
389
390    use crate::{
391        error::FormatterError, formatter, test_utils::pretty_assert_eq, Language, Operation,
392        TopiaryQuery,
393    };
394
395    /// Attempt to parse invalid json, expecting a failure
396    #[test(tokio::test)]
397    async fn parsing_error_fails_formatting() {
398        let mut input = r#"{"foo":{"bar"}}"#.as_bytes();
399        let mut output = Vec::new();
400        let query_content = "(#language! json)";
401        let grammar = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
402        let language = Language {
403            name: "json".to_owned(),
404            query: TopiaryQuery::new(&grammar, query_content).unwrap(),
405            grammar,
406            indent: None,
407        };
408
409        match formatter(
410            &mut input,
411            &mut output,
412            &language,
413            Operation::Format {
414                skip_idempotence: true,
415                tolerate_parsing_errors: false,
416            },
417        ) {
418            Err(FormatterError::Parsing {
419                start_line: 1,
420                end_line: 1,
421                ..
422            }) => {}
423            result => {
424                panic!("Expected a parsing error on line 1, but got {result:?}");
425            }
426        }
427    }
428
429    #[test(tokio::test)]
430    async fn tolerate_parsing_errors() {
431        // Contains the invalid object {"bar"   "baz"}. It should be left untouched.
432        let mut input = "{\"one\":{\"bar\"   \"baz\"},\"two\":\"bar\"}".as_bytes();
433        let expected = "{ \"one\": {\"bar\"   \"baz\"}, \"two\": \"bar\" }\n";
434
435        let mut output = Vec::new();
436        let query_content = fs::read_to_string("../topiary-queries/queries/json.scm").unwrap();
437        let grammar = tree_sitter_json::LANGUAGE.into();
438        let language = Language {
439            name: "json".to_owned(),
440            query: TopiaryQuery::new(&grammar, &query_content).unwrap(),
441            grammar,
442            indent: None,
443        };
444
445        formatter(
446            &mut input,
447            &mut output,
448            &language,
449            Operation::Format {
450                skip_idempotence: true,
451                tolerate_parsing_errors: true,
452            },
453        )
454        .unwrap();
455
456        let formatted = String::from_utf8(output).unwrap();
457        log::debug!("{formatted}");
458
459        pretty_assert_eq(expected, &formatted);
460    }
461}