topiary_core/lib.rs
1//! A general code formatter that relies on
2//! [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) for language
3//! parsing.
4//!
5//! In order for a language to be supported, there must be a [Tree-sitter
6//! grammar](https://tree-sitter.github.io/tree-sitter/#available-parsers)
7//! available, and there must be a query file that dictates how that language is
8//! to be formatted. We include query files for some languages.
9//!
10//! More details can be found on
11//! [GitHub](https://github.com/tweag/topiary).
12
13use std::io;
14
15use pretty_assertions::StrComparison;
16use tree_sitter::Position;
17
18pub use crate::{
19 error::{FormatterError, IoError},
20 language::Language,
21 tree_sitter::{apply_query, CoverageData, SyntaxNode, TopiaryQuery, Visualisation},
22};
23
24mod atom_collection;
25mod error;
26mod graphviz;
27mod language;
28mod pretty;
29mod tree_sitter;
30
31#[doc(hidden)]
32pub mod test_utils;
33
34#[derive(Clone, Debug, Eq, PartialEq)]
35pub struct ScopeInformation {
36 line_number: u32,
37 scope_id: String,
38}
39
40#[derive(Clone, Debug, Default, Eq, PartialEq)]
41pub enum Capitalisation {
42 UpperCase,
43 LowerCase,
44 #[default]
45 Pass,
46}
47/// An atom represents a small piece of the output. We turn Tree-sitter nodes
48/// into atoms, and we add white-space atoms where appropriate. The final list
49/// of atoms is rendered to the output.
50#[derive(Clone, Debug, Default, Eq, PartialEq)]
51pub enum Atom {
52 /// We don't allow consecutive `Hardline`, but a `Blankline` will render two
53 /// newlines to produce a blank line.
54 Blankline,
55 /// A "no-op" atom that will not produce any output.
56 #[default]
57 Empty,
58 /// Represents a newline.
59 Hardline,
60 /// Signals the end of an indentation block.
61 IndentEnd,
62 /// Signals the start of an indentation block. Any lines between the
63 /// beginning and the end will be indented. In single-line constructs where
64 /// the beginning and the end occurs on the same line, there will be no
65 /// indentation.
66 IndentStart,
67 /// Represents the contents of a named Tree-sitter node. We track the node id here
68 /// as well.
69 Leaf {
70 content: String,
71 id: usize,
72 original_position: Position,
73 // marks the leaf to be printed on a single line, with no indentation
74 single_line_no_indent: bool,
75 // if the leaf is multi-line, each line will be indented, not just the first
76 multi_line_indent_all: bool,
77 capitalisation: Capitalisation,
78 },
79 /// Represents a literal string, such as a semicolon.
80 Literal(String),
81 /// Represents a softline. It will be turned into a hardline for multi-line
82 /// constructs, and either a space or nothing for single-line constructs.
83 Softline {
84 spaced: bool,
85 },
86 /// Represents a space. Consecutive spaces are reduced to one before rendering.
87 Space,
88 /// Represents the destruction of errant spaces. Adjacent consecutive spaces are
89 /// reduced to zero before rendering.
90 Antispace,
91 /// Represents a segment to be deleted.
92 // It is a segment, because if one wants to delete a node,
93 // it might happen that it contains several leaves.
94 DeleteBegin,
95 DeleteEnd,
96
97 CaseBegin(Capitalisation),
98 CaseEnd,
99 /// Indicates the beginning of a scope, use in combination with the
100 /// ScopedSoftlines and ScopedConditionals below.
101 ScopeBegin(ScopeInformation),
102 /// Indicates the end of a scope, use in combination with the
103 /// ScopedSoftlines and ScopedConditionals below.
104 ScopeEnd(ScopeInformation),
105 // Indicates the beginning of a *measuring* scope, that must be related to a "normal" one.
106 // Used in combination with ScopedSoftlines and ScopedConditionals below.
107 MeasuringScopeBegin(ScopeInformation),
108 // Indicates the end of a *measuring* scope, that must be related to a "normal" one.
109 // Used in combination with ScopedSoftlines and ScopedConditionals below.
110 MeasuringScopeEnd(ScopeInformation),
111 /// Scoped commands
112 // ScopedSoftline works together with the @{prepend,append}_begin[_measuring]_scope and
113 // @{prepend,append}_end[_measuring]_scope query tags. To decide if a scoped softline
114 // must be expanded into a hardline, we look at the innermost scope having
115 // the corresponding `scope_id`, that encompasses it. We expand the softline
116 // if that scope is multi-line.
117 // If that scope contains a *measuring* scope with the same `scope_id`, we expand
118 // the node iff that *measuring* scope is multi-line.
119 // The `id` value is here for technical reasons,
120 // it allows tracking of the atom during post-processing.
121 ScopedSoftline {
122 id: usize,
123 scope_id: String,
124 spaced: bool,
125 },
126 /// Represents an atom that must only be output if the associated scope
127 /// (or its associated measuring scope, see above) meets the condition
128 /// (single-line or multi-line).
129 ScopedConditional {
130 id: usize,
131 scope_id: String,
132 condition: ScopeCondition,
133 atom: Box<Atom>,
134 },
135}
136
137impl Atom {
138 /// This function is only expected to take spaces and newlines as argument.
139 /// It defines the order Blankline > Hardline > Space > Empty.
140 pub(crate) fn dominates(&self, other: &Atom) -> bool {
141 match self {
142 Atom::Empty => false,
143 Atom::Space => matches!(other, Atom::Empty),
144 Atom::Hardline => matches!(other, Atom::Space | Atom::Empty),
145 Atom::Blankline => matches!(other, Atom::Hardline | Atom::Space | Atom::Empty),
146 _ => panic!("Unexpected character in is_dominant"),
147 }
148 }
149}
150
151/// Used in `Atom::ScopedConditional` to apply the containing Atoms only if
152/// the matched node spans a single line or multiple lines
153#[derive(Clone, Copy, Debug, Eq, PartialEq)]
154pub enum ScopeCondition {
155 /// The Atom is only applied if the matching node spans exactly one line
156 SingleLineOnly,
157 /// The Atom is only applied if the matching node spans two or more lines
158 MultiLineOnly,
159}
160
161/// A convenience wrapper around `std::result::Result<T, FormatterError>`.
162pub type FormatterResult<T> = std::result::Result<T, FormatterError>;
163
164/// Operations that can be performed by the formatter.
165#[derive(Clone, Copy, Debug)]
166pub enum Operation {
167 /// Formatting is the default operation of the formatter, it applies the
168 /// formatting rules defined in the query file and outputs the result
169 Format {
170 /// If true, skips the idempotence check (where we format twice,
171 /// succeeding only if the intermediate and final result are identical)
172 skip_idempotence: bool,
173 /// If true, Topiary will consider an ERROR as it does a leaf node,
174 /// and continues formatting instead of exiting with an error
175 tolerate_parsing_errors: bool,
176 },
177 /// Visualises the parsed file's tree-sitter tree
178 Visualise {
179 /// Choose the type of visualation Topiary should output
180 output_format: Visualisation,
181 },
182}
183
184/// The function that takes an input and formats, or visualises an output.
185///
186/// # Errors
187///
188/// If formatting fails for any reason, a `FormatterError` will be returned.
189///
190/// # Examples
191///
192/// ```
193/// # tokio_test::block_on(async {
194/// use std::fs::File;
195/// use std::io::{BufReader, Read};
196/// use topiary_core::{formatter, Language, FormatterError, TopiaryQuery, Operation};
197///
198/// let input = "[1,2]".to_string();
199/// let mut input = input.as_bytes();
200/// let mut output = Vec::new();
201/// let json = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
202///
203/// let mut query_file = BufReader::new(File::open("../topiary-queries/queries/json.scm").expect("query file"));
204/// let mut query_content = String::new();
205/// query_file.read_to_string(&mut query_content).expect("read query file");
206///
207/// let language: Language = Language {
208/// name: "json".to_owned(),
209/// query: TopiaryQuery::new(&json.clone().into(), &query_content).unwrap(),
210/// grammar: json.into(),
211/// indent: None,
212/// };
213///
214/// match formatter(&mut input, &mut output, &language, Operation::Format{ skip_idempotence: false, tolerate_parsing_errors: false }) {
215/// Ok(()) => {
216/// let formatted = String::from_utf8(output).expect("valid utf-8");
217/// }
218/// Err(FormatterError::Query(message, _)) => {
219/// panic!("Error in query file: {message}");
220/// }
221/// Err(_) => {
222/// panic!("An error occurred");
223/// }
224/// }
225/// # }) // end tokio_test
226/// ```
227pub fn formatter(
228 input: &mut impl io::Read,
229 output: &mut impl io::Write,
230 language: &Language,
231 operation: Operation,
232) -> FormatterResult<()> {
233 let content = read_input(input).map_err(|e| {
234 FormatterError::Io(IoError::Filesystem(
235 "Failed to read input contents".into(),
236 e,
237 ))
238 })?;
239
240 match operation {
241 Operation::Format {
242 skip_idempotence,
243 tolerate_parsing_errors,
244 } => {
245 // All the work related to tree-sitter and the query is done here
246 log::info!("Apply Tree-sitter query");
247
248 let mut atoms = tree_sitter::apply_query(
249 &content,
250 &language.query,
251 &language.grammar,
252 tolerate_parsing_errors,
253 )?;
254
255 // Various post-processing of whitespace
256 atoms.post_process();
257
258 // Pretty-print atoms
259 log::info!("Pretty-print output");
260 let rendered = pretty::render(
261 &atoms[..],
262 // Default to " " if the language has no indentation specified
263 language.indent.as_ref().map_or(" ", |v| v.as_str()),
264 )?;
265
266 // Add a final line break if missing
267 let rendered = format!("{}\n", rendered.trim());
268
269 if !skip_idempotence {
270 idempotence_check(&rendered, language, tolerate_parsing_errors)?;
271 }
272
273 write!(output, "{rendered}")?;
274 }
275
276 Operation::Visualise { output_format } => {
277 let tree = tree_sitter::parse(&content, &language.grammar, false)?;
278 let root: SyntaxNode = tree.root_node().into();
279
280 match output_format {
281 Visualisation::GraphViz => graphviz::write(output, &root)?,
282 Visualisation::Json => serde_json::to_writer(output, &root)?,
283 };
284 }
285 };
286
287 Ok(())
288}
289
290pub fn coverage(
291 input: &mut impl io::Read,
292 output: &mut impl io::Write,
293 language: &Language,
294) -> FormatterResult<()> {
295 let content = read_input(input).map_err(|e| {
296 FormatterError::Io(IoError::Filesystem(
297 "Failed to read input contents".into(),
298 e,
299 ))
300 })?;
301
302 let res = tree_sitter::check_query_coverage(&content, &language.query, &language.grammar)?;
303
304 let queries_string = if res.missing_patterns.is_empty() {
305 if res.cover_percentage == 0.0 {
306 "No queries found".into()
307 } else {
308 "All queries are matched".into()
309 }
310 } else {
311 format!(
312 "Unmatched queries:\n{}",
313 &res.missing_patterns[..].join("\n"),
314 )
315 };
316
317 write!(
318 output,
319 "Query coverage: {:.2}%\n{}\n",
320 res.cover_percentage * 100.0,
321 queries_string,
322 )?;
323
324 if res.cover_percentage == 1.0 {
325 Ok(())
326 } else {
327 Err(FormatterError::PatternDoesNotMatch)
328 }
329}
330
331/// Simple helper function to read the full content of an io Read stream
332fn read_input(input: &mut dyn io::Read) -> Result<String, io::Error> {
333 let mut content = String::new();
334 input.read_to_string(&mut content)?;
335 Ok(content)
336}
337
338/// Perform the idempotence check. Given the already formatted content of the
339/// file, formats the content again and checks if the two are identical.
340/// Result in: `Ok(())`` if the idempotence check succeeded (the content is
341/// identical to the formatted content)
342///
343/// # Errors
344///
345/// `Err(FormatterError::Idempotence)` if the idempotence check failed
346/// `Err(FormatterError::Formatting(...))` if the formatting failed
347fn idempotence_check(
348 content: &str,
349 language: &Language,
350 tolerate_parsing_errors: bool,
351) -> FormatterResult<()> {
352 log::info!("Checking for idempotence ...");
353
354 let mut input = content.as_bytes();
355 let mut output = io::BufWriter::new(Vec::new());
356
357 match formatter(
358 &mut input,
359 &mut output,
360 language,
361 Operation::Format {
362 skip_idempotence: true,
363 tolerate_parsing_errors,
364 },
365 ) {
366 Ok(()) => {
367 let reformatted = String::from_utf8(output.into_inner()?)?;
368
369 if content == reformatted {
370 Ok(())
371 } else {
372 log::error!("Failed idempotence check");
373 log::error!("{}", StrComparison::new(content, &reformatted));
374 Err(FormatterError::Idempotence)
375 }
376 }
377 Err(error @ FormatterError::Parsing { .. }) => {
378 Err(FormatterError::IdempotenceParsing(Box::new(error)))
379 }
380 Err(error) => Err(error),
381 }
382}
383
384#[cfg(test)]
385mod tests {
386 use std::fs;
387
388 use test_log::test;
389
390 use crate::{
391 error::FormatterError, formatter, test_utils::pretty_assert_eq, Language, Operation,
392 TopiaryQuery,
393 };
394
395 /// Attempt to parse invalid json, expecting a failure
396 #[test(tokio::test)]
397 async fn parsing_error_fails_formatting() {
398 let mut input = r#"{"foo":{"bar"}}"#.as_bytes();
399 let mut output = Vec::new();
400 let query_content = "(#language! json)";
401 let grammar = topiary_tree_sitter_facade::Language::from(tree_sitter_json::LANGUAGE);
402 let language = Language {
403 name: "json".to_owned(),
404 query: TopiaryQuery::new(&grammar, query_content).unwrap(),
405 grammar,
406 indent: None,
407 };
408
409 match formatter(
410 &mut input,
411 &mut output,
412 &language,
413 Operation::Format {
414 skip_idempotence: true,
415 tolerate_parsing_errors: false,
416 },
417 ) {
418 Err(FormatterError::Parsing {
419 start_line: 1,
420 end_line: 1,
421 ..
422 }) => {}
423 result => {
424 panic!("Expected a parsing error on line 1, but got {result:?}");
425 }
426 }
427 }
428
429 #[test(tokio::test)]
430 async fn tolerate_parsing_errors() {
431 // Contains the invalid object {"bar" "baz"}. It should be left untouched.
432 let mut input = "{\"one\":{\"bar\" \"baz\"},\"two\":\"bar\"}".as_bytes();
433 let expected = "{ \"one\": {\"bar\" \"baz\"}, \"two\": \"bar\" }\n";
434
435 let mut output = Vec::new();
436 let query_content = fs::read_to_string("../topiary-queries/queries/json.scm").unwrap();
437 let grammar = tree_sitter_json::LANGUAGE.into();
438 let language = Language {
439 name: "json".to_owned(),
440 query: TopiaryQuery::new(&grammar, &query_content).unwrap(),
441 grammar,
442 indent: None,
443 };
444
445 formatter(
446 &mut input,
447 &mut output,
448 &language,
449 Operation::Format {
450 skip_idempotence: true,
451 tolerate_parsing_errors: true,
452 },
453 )
454 .unwrap();
455
456 let formatted = String::from_utf8(output).unwrap();
457 log::debug!("{formatted}");
458
459 pretty_assert_eq(expected, &formatted);
460 }
461}