topiary_core/
lib.rs

1//! A general code formatter that relies on
2//! [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) for language
3//! parsing.
4//!
5//! In order for a language to be supported, there must be a [Tree-sitter
6//! grammar](https://tree-sitter.github.io/tree-sitter/#available-parsers)
7//! available, and there must be a query file that dictates how that language is
8//! to be formatted. We include query files for some languages.
9//!
10//! More details can be found on
11//! [GitHub](https://github.com/tweag/topiary).
12
13use std::io;
14
15use pretty_assertions::StrComparison;
16use tree_sitter::Position;
17
18pub use crate::{
19    error::{FormatterError, IoError},
20    language::Language,
21    tree_sitter::{
22        CoverageData, SyntaxNode, TopiaryQuery, Visualisation, apply_query, check_query_coverage,
23        parse,
24    },
25};
26
27mod atom_collection;
28mod error;
29mod graphviz;
30mod language;
31mod pretty;
32mod tree_sitter;
33
34#[doc(hidden)]
35pub mod test_utils;
36
37#[derive(Clone, Debug, Eq, PartialEq)]
38pub struct ScopeInformation {
39    line_number: u32,
40    scope_id: String,
41}
42
43#[derive(Clone, Debug, Default, Eq, PartialEq)]
44pub enum Capitalisation {
45    UpperCase,
46    LowerCase,
47    #[default]
48    Pass,
49}
50/// An atom represents a small piece of the output. We turn Tree-sitter nodes
51/// into atoms, and we add white-space atoms where appropriate. The final list
52/// of atoms is rendered to the output.
53#[derive(Clone, Debug, Default, Eq, PartialEq)]
54pub enum Atom {
55    /// We don't allow consecutive `Hardline`, but a `Blankline` will render two
56    /// newlines to produce a blank line.
57    Blankline,
58    /// A "no-op" atom that will not produce any output.
59    #[default]
60    Empty,
61    /// Represents a newline.
62    Hardline,
63    /// Signals the end of an indentation block.
64    IndentEnd,
65    /// Signals the start of an indentation block. Any lines between the
66    /// beginning and the end will be indented. In single-line constructs where
67    /// the beginning and the end occurs on the same line, there will be no
68    /// indentation.
69    IndentStart,
70    /// Represents the contents of a named Tree-sitter node. We track the node id here
71    /// as well.
72    Leaf {
73        content: String,
74        id: usize,
75        original_position: Position,
76        // marks the leaf to be printed on a single line, with no indentation
77        single_line_no_indent: bool,
78        // if the leaf is multi-line, each line will be indented, not just the first
79        multi_line_indent_all: bool,
80        // don't trim trailing newline characters if set to true
81        keep_whitespace: bool,
82        capitalisation: Capitalisation,
83    },
84    /// Represents a literal string, such as a semicolon.
85    Literal(String),
86    /// Represents a softline. It will be turned into a hardline for multi-line
87    /// constructs, and either a space or nothing for single-line constructs.
88    Softline {
89        spaced: bool,
90    },
91    /// Represents a space. Consecutive spaces are reduced to one before rendering.
92    Space,
93    /// Represents the destruction of errant spaces. Adjacent consecutive spaces are
94    /// reduced to zero before rendering.
95    Antispace,
96    /// Represents a segment to be deleted.
97    // It is a segment, because if one wants to delete a node,
98    // it might happen that it contains several leaves.
99    DeleteBegin,
100    DeleteEnd,
101
102    CaseBegin(Capitalisation),
103    CaseEnd,
104    /// Indicates the beginning of a scope, use in combination with the
105    /// ScopedSoftlines and ScopedConditionals below.
106    ScopeBegin(ScopeInformation),
107    /// Indicates the end of a scope, use in combination with the
108    /// ScopedSoftlines and ScopedConditionals below.
109    ScopeEnd(ScopeInformation),
110    // Indicates the beginning of a *measuring* scope, that must be related to a "normal" one.
111    // Used in combination with ScopedSoftlines and ScopedConditionals below.
112    MeasuringScopeBegin(ScopeInformation),
113    // Indicates the end of a *measuring* scope, that must be related to a "normal" one.
114    // Used in combination with ScopedSoftlines and ScopedConditionals below.
115    MeasuringScopeEnd(ScopeInformation),
116    /// Scoped commands
117    // ScopedSoftline works together with the @{prepend,append}_begin[_measuring]_scope and
118    // @{prepend,append}_end[_measuring]_scope query tags. To decide if a scoped softline
119    // must be expanded into a hardline, we look at the innermost scope having
120    // the corresponding `scope_id`, that encompasses it. We expand the softline
121    // if that scope is multi-line.
122    // If that scope contains a *measuring* scope with the same `scope_id`, we expand
123    // the node iff that *measuring* scope is multi-line.
124    // The `id` value is here for technical reasons,
125    // it allows tracking of the atom during post-processing.
126    ScopedSoftline {
127        id: usize,
128        scope_id: String,
129        spaced: bool,
130    },
131    /// Represents an atom that must only be output if the associated scope
132    /// (or its associated measuring scope, see above) meets the condition
133    /// (single-line or multi-line).
134    ScopedConditional {
135        id: usize,
136        scope_id: String,
137        condition: ScopeCondition,
138        atom: Box<Atom>,
139    },
140}
141
142impl Atom {
143    /// This function is only expected to take spaces and newlines as argument.
144    /// It defines the order Blankline > Hardline > Space > Empty.
145    pub(crate) fn dominates(&self, other: &Atom) -> bool {
146        match self {
147            Atom::Empty => false,
148            Atom::Space => matches!(other, Atom::Empty),
149            Atom::Hardline => matches!(other, Atom::Space | Atom::Empty),
150            Atom::Blankline => matches!(other, Atom::Hardline | Atom::Space | Atom::Empty),
151            _ => panic!("Unexpected character in is_dominant"),
152        }
153    }
154}
155
156/// Used in `Atom::ScopedConditional` to apply the containing Atoms only if
157/// the matched node spans a single line or multiple lines
158#[derive(Clone, Copy, Debug, Eq, PartialEq)]
159pub enum ScopeCondition {
160    /// The Atom is only applied if the matching node spans exactly one line
161    SingleLineOnly,
162    /// The Atom is only applied if the matching node spans two or more lines
163    MultiLineOnly,
164}
165
166/// A convenience wrapper around `std::result::Result<T, FormatterError>`.
167pub type FormatterResult<T> = std::result::Result<T, FormatterError>;
168
169/// Operations that can be performed by the formatter.
170#[derive(Clone, Copy, Debug)]
171pub enum Operation {
172    /// Formatting is the default operation of the formatter, it applies the
173    /// formatting rules defined in the query file and outputs the result
174    Format {
175        /// If true, skips the idempotence check (where we format twice,
176        /// succeeding only if the intermediate and final result are identical)
177        skip_idempotence: bool,
178        /// If true, Topiary will consider an ERROR as it does a leaf node,
179        /// and continues formatting instead of exiting with an error
180        tolerate_parsing_errors: bool,
181    },
182    /// Visualises the parsed file's tree-sitter tree
183    Visualise {
184        /// Choose the type of visualation Topiary should output
185        output_format: Visualisation,
186    },
187}
188
189/// The function that takes an input and formats, or visualises an output.
190///
191/// # Errors
192///
193/// If formatting fails for any reason, a `FormatterError` will be returned.
194///
195/// # Examples
196///
197/// ```
198/// # tokio_test::block_on(async {
199/// use std::fs::File;
200/// use std::io::{BufReader, Read};
201/// use topiary_core::{formatter, Language, FormatterError, TopiaryQuery, Operation};
202///
203/// let input = "[1,2]".to_string();
204/// let mut input = input.as_bytes();
205/// let mut output = Vec::new();
206/// let json = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
207///
208/// let mut query_file = BufReader::new(File::open("../topiary-queries/queries/json.scm").expect("query file"));
209/// let mut query_content = String::new();
210/// query_file.read_to_string(&mut query_content).expect("read query file");
211///
212/// let language: Language = Language {
213///     name: "json".to_owned(),
214///     query: TopiaryQuery::new(&json.clone().into(), &query_content).unwrap(),
215///     grammar: json.into(),
216///     indent: None,
217/// };
218///
219/// match formatter(&mut input, &mut output, &language, Operation::Format{ skip_idempotence: false, tolerate_parsing_errors: false }) {
220///   Ok(()) => {
221///     let formatted = String::from_utf8(output).expect("valid utf-8");
222///   }
223///   Err(FormatterError::Query(message, _)) => {
224///     panic!("Error in query file: {message}");
225///   }
226///   Err(_) => {
227///     panic!("An error occurred");
228///   }
229/// }
230/// # }) // end tokio_test
231/// ```
232pub fn formatter(
233    input: &mut impl io::Read,
234    output: &mut impl io::Write,
235    language: &Language,
236    operation: Operation,
237) -> FormatterResult<()> {
238    let content = read_input(input).map_err(|e| {
239        FormatterError::Io(IoError::Filesystem(
240            "Failed to read input contents".into(),
241            e,
242        ))
243    })?;
244
245    formatter_str(&content, output, language, operation)
246}
247
248/// The function that takes a string slice and formats, or visualises an output.
249///
250/// # Errors
251///
252/// If formatting fails for any reason, a `FormatterError` will be returned.
253pub fn formatter_str(
254    input: &str,
255    output: &mut impl io::Write,
256    language: &Language,
257    operation: Operation,
258) -> FormatterResult<()> {
259    let tolerate_parsing_errors = match operation {
260        Operation::Format {
261            tolerate_parsing_errors,
262            ..
263        } => tolerate_parsing_errors,
264        _ => false,
265    };
266
267    let tree = tree_sitter::parse(input, &language.grammar, tolerate_parsing_errors)?;
268
269    formatter_tree(tree, input, output, language, operation)?;
270
271    Ok(())
272}
273
274/// The function that takes a tree and formats, or visualises an output.
275///
276/// # Errors
277///
278/// If formatting fails for any reason, a `FormatterError` will be returned.
279pub fn formatter_tree(
280    tree: topiary_tree_sitter_facade::Tree,
281    input_content: &str,
282    output: &mut impl io::Write,
283    language: &Language,
284    operation: Operation,
285) -> FormatterResult<()> {
286    match operation {
287        Operation::Format {
288            skip_idempotence,
289            tolerate_parsing_errors,
290        } => {
291            // All the work related to tree-sitter and the query is done here
292            log::debug!("Apply Tree-sitter query");
293
294            let mut atoms = tree_sitter::apply_query_tree(tree, input_content, &language.query)?;
295
296            // Various post-processing of whitespace
297            atoms.post_process();
298
299            // Pretty-print atoms
300            log::debug!("Pretty-print output");
301            let rendered = pretty::render(
302                &atoms[..],
303                // Default to "  " if the language has no indentation specified
304                language.indent.as_ref().map_or("  ", |v| v.as_str()),
305            )?;
306
307            // Add a final line break if missing
308            let rendered = format!("{}\n", rendered.trim());
309
310            if !skip_idempotence {
311                idempotence_check(&rendered, language, tolerate_parsing_errors)?;
312            }
313
314            write!(output, "{rendered}")?;
315        }
316
317        Operation::Visualise { output_format } => {
318            let root: SyntaxNode = tree.root_node().into();
319
320            match output_format {
321                Visualisation::GraphViz => graphviz::write(output, &root)?,
322                Visualisation::Json => serde_json::to_writer(output, &root)?,
323            };
324        }
325    };
326    Ok(())
327}
328
329/// Simple helper function to read the full content of an io Read stream
330fn read_input(input: &mut dyn io::Read) -> Result<String, io::Error> {
331    let mut content = String::new();
332    input.read_to_string(&mut content)?;
333    Ok(content)
334}
335
336/// Perform the idempotence check. Given the already formatted content of the
337/// file, formats the content again and checks if the two are identical.
338/// Result in: `Ok(())`` if the idempotence check succeeded (the content is
339/// identical to the formatted content)
340///
341/// # Errors
342///
343/// `Err(FormatterError::Idempotence)` if the idempotence check failed
344/// `Err(FormatterError::Formatting(...))` if the formatting failed
345fn idempotence_check(
346    content: &str,
347    language: &Language,
348    tolerate_parsing_errors: bool,
349) -> FormatterResult<()> {
350    log::info!("Checking for idempotence ...");
351
352    let mut input = content.as_bytes();
353    let mut output = io::BufWriter::new(Vec::new());
354
355    match formatter(
356        &mut input,
357        &mut output,
358        language,
359        Operation::Format {
360            skip_idempotence: true,
361            tolerate_parsing_errors,
362        },
363    ) {
364        Ok(()) => {
365            let reformatted = String::from_utf8(output.into_inner()?)?;
366
367            if content == reformatted {
368                Ok(())
369            } else {
370                log::error!("Failed idempotence check");
371                log::error!("{}", StrComparison::new(content, &reformatted));
372                Err(FormatterError::Idempotence)
373            }
374        }
375        Err(error @ FormatterError::Parsing { .. }) => {
376            Err(FormatterError::IdempotenceParsing(Box::new(error)))
377        }
378        Err(error) => Err(error),
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use std::fs;
385
386    use test_log::test;
387
388    use crate::{
389        Language, Operation, TopiaryQuery, error::FormatterError, formatter,
390        test_utils::pretty_assert_eq,
391    };
392
393    /// Attempt to parse invalid json, expecting a failure
394    #[test(tokio::test)]
395    async fn parsing_error_fails_formatting() {
396        let mut input = r#"{"foo":{"bar"}}"#.as_bytes();
397        let mut output = Vec::new();
398        let query_content = "(#language! json)";
399        let grammar = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
400        let language = Language {
401            name: "json".to_owned(),
402            query: TopiaryQuery::new(&grammar, query_content).unwrap(),
403            grammar,
404            indent: None,
405        };
406
407        match formatter(
408            &mut input,
409            &mut output,
410            &language,
411            Operation::Format {
412                skip_idempotence: true,
413                tolerate_parsing_errors: false,
414            },
415        ) {
416            // start end == 1
417            Err(FormatterError::Parsing(node))
418                if node.start_point().row() == 0 && node.end_point().row() == 0 => {}
419            result => {
420                panic!("Expected a parsing error on line 1, but got {result:?}");
421            }
422        }
423    }
424
425    #[test(tokio::test)]
426    async fn tolerate_parsing_errors() {
427        // Contains the invalid object {"bar"   "baz"}. It should be left untouched.
428        let mut input = "{\"one\":{\"bar\"   \"baz\"},\"two\":\"bar\"}".as_bytes();
429        let expected = "{ \"one\": {\"bar\"   \"baz\"}, \"two\": \"bar\" }\n";
430
431        let mut output = Vec::new();
432        let query_content = fs::read_to_string("../topiary-queries/queries/json.scm").unwrap();
433        let grammar = tree_sitter_json::LANGUAGE.into();
434        let language = Language {
435            name: "json".to_owned(),
436            query: TopiaryQuery::new(&grammar, &query_content).unwrap(),
437            grammar,
438            indent: None,
439        };
440
441        formatter(
442            &mut input,
443            &mut output,
444            &language,
445            Operation::Format {
446                skip_idempotence: true,
447                tolerate_parsing_errors: true,
448            },
449        )
450        .unwrap();
451
452        let formatted = String::from_utf8(output).unwrap();
453        log::debug!("{formatted}");
454
455        pretty_assert_eq(expected, &formatted);
456    }
457}