topiary_core/lib.rs
1//! A general code formatter that relies on
2//! [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) for language
3//! parsing.
4//!
5//! In order for a language to be supported, there must be a [Tree-sitter
6//! grammar](https://tree-sitter.github.io/tree-sitter/#available-parsers)
7//! available, and there must be a query file that dictates how that language is
8//! to be formatted. We include query files for some languages.
9//!
10//! More details can be found on
11//! [GitHub](https://github.com/tweag/topiary).
12
13use std::io;
14
15use pretty_assertions::StrComparison;
16use tree_sitter::Position;
17
18pub use crate::{
19 error::{FormatterError, IoError},
20 language::Language,
21 tree_sitter::{
22 CoverageData, SyntaxNode, TopiaryQuery, Visualisation, apply_query, check_query_coverage,
23 parse,
24 },
25};
26
27mod atom_collection;
28mod error;
29mod graphviz;
30mod language;
31mod pretty;
32mod tree_sitter;
33
34#[doc(hidden)]
35pub mod test_utils;
36
37#[derive(Clone, Debug, Eq, PartialEq)]
38pub struct ScopeInformation {
39 line_number: u32,
40 scope_id: String,
41}
42
43#[derive(Clone, Debug, Default, Eq, PartialEq)]
44pub enum Capitalisation {
45 UpperCase,
46 LowerCase,
47 #[default]
48 Pass,
49}
50/// An atom represents a small piece of the output. We turn Tree-sitter nodes
51/// into atoms, and we add white-space atoms where appropriate. The final list
52/// of atoms is rendered to the output.
53#[derive(Clone, Debug, Default, Eq, PartialEq)]
54pub enum Atom {
55 /// We don't allow consecutive `Hardline`, but a `Blankline` will render two
56 /// newlines to produce a blank line.
57 Blankline,
58 /// A "no-op" atom that will not produce any output.
59 #[default]
60 Empty,
61 /// Represents a newline.
62 Hardline,
63 /// Signals the end of an indentation block.
64 IndentEnd,
65 /// Signals the start of an indentation block. Any lines between the
66 /// beginning and the end will be indented. In single-line constructs where
67 /// the beginning and the end occurs on the same line, there will be no
68 /// indentation.
69 IndentStart,
70 /// Represents the contents of a named Tree-sitter node. We track the node id here
71 /// as well.
72 Leaf {
73 content: String,
74 id: usize,
75 original_position: Position,
76 // marks the leaf to be printed on a single line, with no indentation
77 single_line_no_indent: bool,
78 // if the leaf is multi-line, each line will be indented, not just the first
79 multi_line_indent_all: bool,
80 // don't trim trailing newline characters if set to true
81 keep_whitespace: bool,
82 capitalisation: Capitalisation,
83 },
84 /// Represents a literal string, such as a semicolon.
85 Literal(String),
86 /// Represents a softline. It will be turned into a hardline for multi-line
87 /// constructs, and either a space or nothing for single-line constructs.
88 Softline {
89 spaced: bool,
90 },
91 /// Represents a space. Consecutive spaces are reduced to one before rendering.
92 Space,
93 /// Represents the destruction of errant spaces. Adjacent consecutive spaces are
94 /// reduced to zero before rendering.
95 Antispace,
96 /// Represents a segment to be deleted.
97 // It is a segment, because if one wants to delete a node,
98 // it might happen that it contains several leaves.
99 DeleteBegin,
100 DeleteEnd,
101
102 CaseBegin(Capitalisation),
103 CaseEnd,
104 /// Indicates the beginning of a scope, use in combination with the
105 /// ScopedSoftlines and ScopedConditionals below.
106 ScopeBegin(ScopeInformation),
107 /// Indicates the end of a scope, use in combination with the
108 /// ScopedSoftlines and ScopedConditionals below.
109 ScopeEnd(ScopeInformation),
110 // Indicates the beginning of a *measuring* scope, that must be related to a "normal" one.
111 // Used in combination with ScopedSoftlines and ScopedConditionals below.
112 MeasuringScopeBegin(ScopeInformation),
113 // Indicates the end of a *measuring* scope, that must be related to a "normal" one.
114 // Used in combination with ScopedSoftlines and ScopedConditionals below.
115 MeasuringScopeEnd(ScopeInformation),
116 /// Scoped commands
117 // ScopedSoftline works together with the @{prepend,append}_begin[_measuring]_scope and
118 // @{prepend,append}_end[_measuring]_scope query tags. To decide if a scoped softline
119 // must be expanded into a hardline, we look at the innermost scope having
120 // the corresponding `scope_id`, that encompasses it. We expand the softline
121 // if that scope is multi-line.
122 // If that scope contains a *measuring* scope with the same `scope_id`, we expand
123 // the node iff that *measuring* scope is multi-line.
124 // The `id` value is here for technical reasons,
125 // it allows tracking of the atom during post-processing.
126 ScopedSoftline {
127 id: usize,
128 scope_id: String,
129 spaced: bool,
130 },
131 /// Represents an atom that must only be output if the associated scope
132 /// (or its associated measuring scope, see above) meets the condition
133 /// (single-line or multi-line).
134 ScopedConditional {
135 id: usize,
136 scope_id: String,
137 condition: ScopeCondition,
138 atom: Box<Atom>,
139 },
140}
141
142impl Atom {
143 /// This function is only expected to take spaces and newlines as argument.
144 /// It defines the order Blankline > Hardline > Space > Empty.
145 pub(crate) fn dominates(&self, other: &Atom) -> bool {
146 match self {
147 Atom::Empty => false,
148 Atom::Space => matches!(other, Atom::Empty),
149 Atom::Hardline => matches!(other, Atom::Space | Atom::Empty),
150 Atom::Blankline => matches!(other, Atom::Hardline | Atom::Space | Atom::Empty),
151 _ => panic!("Unexpected character in is_dominant"),
152 }
153 }
154}
155
156/// Used in `Atom::ScopedConditional` to apply the containing Atoms only if
157/// the matched node spans a single line or multiple lines
158#[derive(Clone, Copy, Debug, Eq, PartialEq)]
159pub enum ScopeCondition {
160 /// The Atom is only applied if the matching node spans exactly one line
161 SingleLineOnly,
162 /// The Atom is only applied if the matching node spans two or more lines
163 MultiLineOnly,
164}
165
166/// A convenience wrapper around `std::result::Result<T, FormatterError>`.
167pub type FormatterResult<T> = std::result::Result<T, FormatterError>;
168
169/// Operations that can be performed by the formatter.
170#[derive(Clone, Copy, Debug)]
171pub enum Operation {
172 /// Formatting is the default operation of the formatter, it applies the
173 /// formatting rules defined in the query file and outputs the result
174 Format {
175 /// If true, skips the idempotence check (where we format twice,
176 /// succeeding only if the intermediate and final result are identical)
177 skip_idempotence: bool,
178 /// If true, Topiary will consider an ERROR as it does a leaf node,
179 /// and continues formatting instead of exiting with an error
180 tolerate_parsing_errors: bool,
181 },
182 /// Visualises the parsed file's tree-sitter tree
183 Visualise {
184 /// Choose the type of visualation Topiary should output
185 output_format: Visualisation,
186 },
187}
188
189/// The function that takes an input and formats, or visualises an output.
190///
191/// # Errors
192///
193/// If formatting fails for any reason, a `FormatterError` will be returned.
194///
195/// # Examples
196///
197/// ```
198/// # tokio_test::block_on(async {
199/// use std::fs::File;
200/// use std::io::{BufReader, Read};
201/// use topiary_core::{formatter, Language, FormatterError, TopiaryQuery, Operation};
202///
203/// let input = "[1,2]".to_string();
204/// let mut input = input.as_bytes();
205/// let mut output = Vec::new();
206/// let json = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
207///
208/// let mut query_file = BufReader::new(File::open("../topiary-queries/queries/json.scm").expect("query file"));
209/// let mut query_content = String::new();
210/// query_file.read_to_string(&mut query_content).expect("read query file");
211///
212/// let language: Language = Language {
213/// name: "json".to_owned(),
214/// query: TopiaryQuery::new(&json.clone().into(), &query_content).unwrap(),
215/// grammar: json.into(),
216/// indent: None,
217/// };
218///
219/// match formatter(&mut input, &mut output, &language, Operation::Format{ skip_idempotence: false, tolerate_parsing_errors: false }) {
220/// Ok(()) => {
221/// let formatted = String::from_utf8(output).expect("valid utf-8");
222/// }
223/// Err(FormatterError::Query(message, _)) => {
224/// panic!("Error in query file: {message}");
225/// }
226/// Err(_) => {
227/// panic!("An error occurred");
228/// }
229/// }
230/// # }) // end tokio_test
231/// ```
232pub fn formatter(
233 input: &mut impl io::Read,
234 output: &mut impl io::Write,
235 language: &Language,
236 operation: Operation,
237) -> FormatterResult<()> {
238 let content = read_input(input).map_err(|e| {
239 FormatterError::Io(IoError::Filesystem(
240 "Failed to read input contents".into(),
241 e,
242 ))
243 })?;
244
245 formatter_str(&content, output, language, operation)
246}
247
248/// The function that takes a string slice and formats, or visualises an output.
249///
250/// # Errors
251///
252/// If formatting fails for any reason, a `FormatterError` will be returned.
253pub fn formatter_str(
254 input: &str,
255 output: &mut impl io::Write,
256 language: &Language,
257 operation: Operation,
258) -> FormatterResult<()> {
259 let tolerate_parsing_errors = match operation {
260 Operation::Format {
261 tolerate_parsing_errors,
262 ..
263 } => tolerate_parsing_errors,
264 _ => false,
265 };
266
267 let tree = tree_sitter::parse(input, &language.grammar, tolerate_parsing_errors)?;
268
269 formatter_tree(tree, input, output, language, operation)?;
270
271 Ok(())
272}
273
274/// The function that takes a tree and formats, or visualises an output.
275///
276/// # Errors
277///
278/// If formatting fails for any reason, a `FormatterError` will be returned.
279pub fn formatter_tree(
280 tree: topiary_tree_sitter_facade::Tree,
281 input_content: &str,
282 output: &mut impl io::Write,
283 language: &Language,
284 operation: Operation,
285) -> FormatterResult<()> {
286 match operation {
287 Operation::Format {
288 skip_idempotence,
289 tolerate_parsing_errors,
290 } => {
291 // All the work related to tree-sitter and the query is done here
292 log::debug!("Apply Tree-sitter query");
293
294 let mut atoms = tree_sitter::apply_query_tree(tree, input_content, &language.query)?;
295
296 // Various post-processing of whitespace
297 atoms.post_process();
298
299 // Pretty-print atoms
300 log::debug!("Pretty-print output");
301 let rendered = pretty::render(
302 &atoms[..],
303 // Default to " " if the language has no indentation specified
304 language.indent.as_ref().map_or(" ", |v| v.as_str()),
305 )?;
306
307 // Add a final line break if missing
308 let rendered = format!("{}\n", rendered.trim());
309
310 if !skip_idempotence {
311 idempotence_check(&rendered, language, tolerate_parsing_errors)?;
312 }
313
314 write!(output, "{rendered}")?;
315 }
316
317 Operation::Visualise { output_format } => {
318 let root: SyntaxNode = tree.root_node().into();
319
320 match output_format {
321 Visualisation::GraphViz => graphviz::write(output, &root)?,
322 Visualisation::Json => serde_json::to_writer(output, &root)?,
323 };
324 }
325 };
326 Ok(())
327}
328
329/// Simple helper function to read the full content of an io Read stream
330fn read_input(input: &mut dyn io::Read) -> Result<String, io::Error> {
331 let mut content = String::new();
332 input.read_to_string(&mut content)?;
333 Ok(content)
334}
335
336/// Perform the idempotence check. Given the already formatted content of the
337/// file, formats the content again and checks if the two are identical.
338/// Result in: `Ok(())`` if the idempotence check succeeded (the content is
339/// identical to the formatted content)
340///
341/// # Errors
342///
343/// `Err(FormatterError::Idempotence)` if the idempotence check failed
344/// `Err(FormatterError::Formatting(...))` if the formatting failed
345fn idempotence_check(
346 content: &str,
347 language: &Language,
348 tolerate_parsing_errors: bool,
349) -> FormatterResult<()> {
350 log::info!("Checking for idempotence ...");
351
352 let mut input = content.as_bytes();
353 let mut output = io::BufWriter::new(Vec::new());
354
355 match formatter(
356 &mut input,
357 &mut output,
358 language,
359 Operation::Format {
360 skip_idempotence: true,
361 tolerate_parsing_errors,
362 },
363 ) {
364 Ok(()) => {
365 let reformatted = String::from_utf8(output.into_inner()?)?;
366
367 if content == reformatted {
368 Ok(())
369 } else {
370 log::error!("Failed idempotence check");
371 log::error!("{}", StrComparison::new(content, &reformatted));
372 Err(FormatterError::Idempotence)
373 }
374 }
375 Err(error @ FormatterError::Parsing { .. }) => {
376 Err(FormatterError::IdempotenceParsing(Box::new(error)))
377 }
378 Err(error) => Err(error),
379 }
380}
381
382#[cfg(test)]
383mod tests {
384 use std::fs;
385
386 use test_log::test;
387
388 use crate::{
389 Language, Operation, TopiaryQuery, error::FormatterError, formatter,
390 test_utils::pretty_assert_eq,
391 };
392
393 /// Attempt to parse invalid json, expecting a failure
394 #[test(tokio::test)]
395 async fn parsing_error_fails_formatting() {
396 let mut input = r#"{"foo":{"bar"}}"#.as_bytes();
397 let mut output = Vec::new();
398 let query_content = "(#language! json)";
399 let grammar = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
400 let language = Language {
401 name: "json".to_owned(),
402 query: TopiaryQuery::new(&grammar, query_content).unwrap(),
403 grammar,
404 indent: None,
405 };
406
407 match formatter(
408 &mut input,
409 &mut output,
410 &language,
411 Operation::Format {
412 skip_idempotence: true,
413 tolerate_parsing_errors: false,
414 },
415 ) {
416 // start end == 1
417 Err(FormatterError::Parsing(node))
418 if node.start_point().row() == 0 && node.end_point().row() == 0 => {}
419 result => {
420 panic!("Expected a parsing error on line 1, but got {result:?}");
421 }
422 }
423 }
424
425 #[test(tokio::test)]
426 async fn tolerate_parsing_errors() {
427 // Contains the invalid object {"bar" "baz"}. It should be left untouched.
428 let mut input = "{\"one\":{\"bar\" \"baz\"},\"two\":\"bar\"}".as_bytes();
429 let expected = "{ \"one\": {\"bar\" \"baz\"}, \"two\": \"bar\" }\n";
430
431 let mut output = Vec::new();
432 let query_content = fs::read_to_string("../topiary-queries/queries/json.scm").unwrap();
433 let grammar = tree_sitter_json::LANGUAGE.into();
434 let language = Language {
435 name: "json".to_owned(),
436 query: TopiaryQuery::new(&grammar, &query_content).unwrap(),
437 grammar,
438 indent: None,
439 };
440
441 formatter(
442 &mut input,
443 &mut output,
444 &language,
445 Operation::Format {
446 skip_idempotence: true,
447 tolerate_parsing_errors: true,
448 },
449 )
450 .unwrap();
451
452 let formatted = String::from_utf8(output).unwrap();
453 log::debug!("{formatted}");
454
455 pretty_assert_eq(expected, &formatted);
456 }
457}