Skip to main content

tree_sitter_cli/
parse.rs

1use std::{
2    fmt, fs,
3    io::{self, Write},
4    ops::ControlFlow,
5    path::{Path, PathBuf},
6    sync::atomic::{AtomicUsize, Ordering},
7    time::{Duration, Instant},
8};
9
10use anstyle::{AnsiColor, Color, RgbColor};
11use anyhow::{anyhow, Context, Result};
12use clap::ValueEnum;
13use log::info;
14use schemars::JsonSchema;
15use serde::{Deserialize, Serialize};
16use tree_sitter::{
17    ffi, InputEdit, Language, LogType, ParseOptions, ParseState, Parser, Point, Range, Tree,
18    TreeCursor,
19};
20
21use crate::{fuzz::edits::Edit, logger::paint, util};
22
23#[derive(Debug, Default, Serialize, JsonSchema)]
24pub struct Stats {
25    pub successful_parses: usize,
26    pub total_parses: usize,
27    pub total_bytes: usize,
28    pub total_duration: Duration,
29}
30
31impl fmt::Display for Stats {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        let duration_us = self.total_duration.as_micros();
34        let success_rate = if self.total_parses > 0 {
35            format!(
36                "{:.2}%",
37                ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0,
38            )
39        } else {
40            "N/A".to_string()
41        };
42        let duration_str = match (self.total_parses, duration_us) {
43            (0, _) => "N/A".to_string(),
44            (_, 0) => "0 bytes/ms".to_string(),
45            (_, _) => format!(
46                "{} bytes/ms",
47                ((self.total_bytes as u128) * 1_000) / duration_us
48            ),
49        };
50        writeln!(
51            f,
52            "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {success_rate}; average speed: {duration_str}",
53            self.total_parses,
54            self.successful_parses,
55            self.total_parses - self.successful_parses,
56        )
57    }
58}
59
60/// Sets the color used in the output of `tree-sitter parse --cst`
61#[derive(Debug, Copy, Clone)]
62pub struct ParseTheme {
63    /// The color of node kinds
64    pub node_kind: Option<Color>,
65    /// The color of text associated with a node
66    pub node_text: Option<Color>,
67    /// The color of node fields
68    pub field: Option<Color>,
69    /// The color of the range information for unnamed nodes
70    pub row_color: Option<Color>,
71    /// The color of the range information for named nodes
72    pub row_color_named: Option<Color>,
73    /// The color of extra nodes
74    pub extra: Option<Color>,
75    /// The color of ERROR nodes
76    pub error: Option<Color>,
77    /// The color of MISSING nodes and their associated text
78    pub missing: Option<Color>,
79    /// The color of newline characters
80    pub line_feed: Option<Color>,
81    /// The color of backticks
82    pub backtick: Option<Color>,
83    /// The color of literals
84    pub literal: Option<Color>,
85}
86
87impl ParseTheme {
88    const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118));
89    const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181));
90    const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51));
91    const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173));
92    const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67));
93
94    #[must_use]
95    pub const fn empty() -> Self {
96        Self {
97            node_kind: None,
98            node_text: None,
99            field: None,
100            row_color: None,
101            row_color_named: None,
102            extra: None,
103            error: None,
104            missing: None,
105            line_feed: None,
106            backtick: None,
107            literal: None,
108        }
109    }
110}
111
112impl Default for ParseTheme {
113    fn default() -> Self {
114        Self {
115            node_kind: Some(AnsiColor::BrightCyan.into()),
116            node_text: Some(Self::GRAY),
117            field: Some(AnsiColor::Blue.into()),
118            row_color: Some(AnsiColor::White.into()),
119            row_color_named: Some(AnsiColor::BrightCyan.into()),
120            extra: Some(AnsiColor::BrightMagenta.into()),
121            error: Some(AnsiColor::Red.into()),
122            missing: Some(Self::ORANGE),
123            line_feed: Some(Self::LIGHT_GRAY),
124            backtick: Some(Self::GREEN),
125            literal: Some(Self::YELLOW),
126        }
127    }
128}
129
130#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
131pub struct Rgb(pub u8, pub u8, pub u8);
132
133impl From<Rgb> for RgbColor {
134    fn from(val: Rgb) -> Self {
135        Self(val.0, val.1, val.2)
136    }
137}
138
139#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
140#[serde(rename_all = "kebab-case")]
141pub struct Config {
142    pub parse_theme: Option<ParseThemeRaw>,
143}
144
145#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
146#[serde(rename_all = "kebab-case")]
147pub struct ParseThemeRaw {
148    pub node_kind: Option<Rgb>,
149    pub node_text: Option<Rgb>,
150    pub field: Option<Rgb>,
151    pub row_color: Option<Rgb>,
152    pub row_color_named: Option<Rgb>,
153    pub extra: Option<Rgb>,
154    pub error: Option<Rgb>,
155    pub missing: Option<Rgb>,
156    pub line_feed: Option<Rgb>,
157    pub backtick: Option<Rgb>,
158    pub literal: Option<Rgb>,
159}
160
161impl From<ParseThemeRaw> for ParseTheme {
162    fn from(value: ParseThemeRaw) -> Self {
163        let val_or_default = |val: Option<Rgb>, default: Option<Color>| -> Option<Color> {
164            val.map_or(default, |v| Some(Color::Rgb(v.into())))
165        };
166        let default = Self::default();
167
168        Self {
169            node_kind: val_or_default(value.node_kind, default.node_kind),
170            node_text: val_or_default(value.node_text, default.node_text),
171            field: val_or_default(value.field, default.field),
172            row_color: val_or_default(value.row_color, default.row_color),
173            row_color_named: val_or_default(value.row_color_named, default.row_color_named),
174            extra: val_or_default(value.extra, default.extra),
175            error: val_or_default(value.error, default.error),
176            missing: val_or_default(value.missing, default.missing),
177            line_feed: val_or_default(value.line_feed, default.line_feed),
178            backtick: val_or_default(value.backtick, default.backtick),
179            literal: val_or_default(value.literal, default.literal),
180        }
181    }
182}
183
184#[derive(Copy, Clone, PartialEq, Eq)]
185pub enum ParseOutput {
186    Normal,
187    Quiet,
188    Xml,
189    Cst,
190    Dot,
191}
192
193/// A position in a multi-line text document, in terms of rows and columns.
194///
195/// Rows and columns are zero-based.
196///
197/// This serves as a serializable wrapper for `Point`
198#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
199pub struct ParsePoint {
200    pub row: usize,
201    pub column: usize,
202}
203
204impl From<Point> for ParsePoint {
205    fn from(value: Point) -> Self {
206        Self {
207            row: value.row,
208            column: value.column,
209        }
210    }
211}
212
213#[derive(Serialize, Default, Debug, Clone)]
214pub struct ParseSummary {
215    pub file: PathBuf,
216    pub successful: bool,
217    pub start: Option<ParsePoint>,
218    pub end: Option<ParsePoint>,
219    pub duration: Option<Duration>,
220    pub bytes: Option<usize>,
221}
222
223impl ParseSummary {
224    #[must_use]
225    pub fn new(path: &Path) -> Self {
226        Self {
227            file: path.to_path_buf(),
228            successful: false,
229            ..Default::default()
230        }
231    }
232}
233
234#[derive(Serialize, Debug)]
235pub struct ParseStats {
236    pub parse_summaries: Vec<ParseSummary>,
237    pub cumulative_stats: Stats,
238    pub source_count: usize,
239}
240
241impl Default for ParseStats {
242    fn default() -> Self {
243        Self {
244            parse_summaries: Vec::new(),
245            cumulative_stats: Stats::default(),
246            source_count: 1,
247        }
248    }
249}
250
251#[derive(Serialize, ValueEnum, Debug, Copy, Clone, Default, Eq, PartialEq)]
252pub enum ParseDebugType {
253    #[default]
254    Quiet,
255    Normal,
256    Pretty,
257}
258
259pub struct ParseFileOptions<'a> {
260    pub edits: &'a [&'a str],
261    pub output: ParseOutput,
262    pub stats: &'a mut ParseStats,
263    pub print_time: bool,
264    pub timeout: u64,
265    pub debug: ParseDebugType,
266    pub debug_graph: bool,
267    pub cancellation_flag: Option<&'a AtomicUsize>,
268    pub encoding: Option<u32>,
269    pub open_log: bool,
270    pub no_ranges: bool,
271    pub parse_theme: &'a ParseTheme,
272}
273
274#[derive(Copy, Clone)]
275pub struct ParseResult {
276    pub successful: bool,
277    pub bytes: usize,
278    pub duration: Option<Duration>,
279}
280
281pub fn parse_file_at_path(
282    parser: &mut Parser,
283    language: &Language,
284    path: &Path,
285    name: &str,
286    max_path_length: usize,
287    opts: &mut ParseFileOptions,
288) -> Result<()> {
289    let mut _log_session = None;
290    parser.set_language(language)?;
291    let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?;
292
293    // Render an HTML graph if `--debug-graph` was passed
294    if opts.debug_graph {
295        _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?);
296    }
297    // Log to stderr if `--debug` was passed
298    else if opts.debug != ParseDebugType::Quiet {
299        let mut curr_version: usize = 0;
300        let use_color = std::env::var("NO_COLOR").map_or(true, |v| v != "1");
301        let debug = opts.debug;
302        parser.set_logger(Some(Box::new(move |log_type, message| {
303            if debug == ParseDebugType::Normal {
304                if log_type == LogType::Lex {
305                    write!(&mut io::stderr(), "  ").unwrap();
306                }
307                writeln!(&mut io::stderr(), "{message}").unwrap();
308            } else {
309                #[rustfmt::skip]
310                let colors = &[
311                    AnsiColor::White, AnsiColor::Red, AnsiColor::Blue, AnsiColor::Green,
312                    AnsiColor::Cyan, AnsiColor::Yellow, AnsiColor::Magenta,
313                    AnsiColor::BrightWhite, AnsiColor::BrightRed, AnsiColor::BrightBlue,
314                    AnsiColor::BrightGreen, AnsiColor::BrightCyan, AnsiColor::BrightYellow,
315                    AnsiColor::BrightMagenta,
316                ];
317                if message.starts_with("process version:") {
318                    let comma_idx = message.find(',').unwrap();
319                    curr_version = message["process version:".len()..comma_idx]
320                        .parse()
321                        .unwrap();
322                }
323                let color = if use_color {
324                    Some(colors[curr_version % colors.len()])
325                } else {
326                    None
327                };
328                let mut out = if log_type == LogType::Lex {
329                    "  ".to_string()
330                } else {
331                    String::new()
332                };
333                out += &paint(color, message);
334                writeln!(&mut io::stderr(), "{out}").unwrap();
335            }
336        })));
337    }
338
339    let parse_time = Instant::now();
340
341    #[inline(always)]
342    fn is_utf16_le_bom(bom_bytes: &[u8]) -> bool {
343        bom_bytes == [0xFF, 0xFE]
344    }
345
346    #[inline(always)]
347    fn is_utf16_be_bom(bom_bytes: &[u8]) -> bool {
348        bom_bytes == [0xFE, 0xFF]
349    }
350
351    let encoding = match opts.encoding {
352        None if source_code.len() >= 2 => {
353            if is_utf16_le_bom(&source_code[0..2]) {
354                Some(ffi::TSInputEncodingUTF16LE)
355            } else if is_utf16_be_bom(&source_code[0..2]) {
356                Some(ffi::TSInputEncodingUTF16BE)
357            } else {
358                None
359            }
360        }
361        _ => opts.encoding,
362    };
363
364    // If the `--cancel` flag was passed, then cancel the parse
365    // when the user types a newline.
366    //
367    // Additionally, if the `--time` flag was passed, end the parse
368    // after the specified number of microseconds.
369    let start_time = Instant::now();
370    let progress_callback = &mut |_: &ParseState| {
371        if let Some(cancellation_flag) = opts.cancellation_flag {
372            if cancellation_flag.load(Ordering::SeqCst) != 0 {
373                return ControlFlow::Break(());
374            }
375        }
376
377        if opts.timeout > 0 && start_time.elapsed().as_micros() > opts.timeout as u128 {
378            return ControlFlow::Break(());
379        }
380
381        ControlFlow::Continue(())
382    };
383
384    let parse_opts = ParseOptions::new().progress_callback(progress_callback);
385
386    let tree = match encoding {
387        Some(encoding) if encoding == ffi::TSInputEncodingUTF16LE => {
388            let source_code_utf16 = source_code
389                .chunks_exact(2)
390                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
391                .collect::<Vec<_>>();
392            parser.parse_utf16_le_with_options(
393                &mut |i, _| {
394                    if i < source_code_utf16.len() {
395                        &source_code_utf16[i..]
396                    } else {
397                        &[]
398                    }
399                },
400                None,
401                Some(parse_opts),
402            )
403        }
404        Some(encoding) if encoding == ffi::TSInputEncodingUTF16BE => {
405            let source_code_utf16 = source_code
406                .chunks_exact(2)
407                .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
408                .collect::<Vec<_>>();
409            parser.parse_utf16_be_with_options(
410                &mut |i, _| {
411                    if i < source_code_utf16.len() {
412                        &source_code_utf16[i..]
413                    } else {
414                        &[]
415                    }
416                },
417                None,
418                Some(parse_opts),
419            )
420        }
421        _ => parser.parse_with_options(
422            &mut |i, _| {
423                if i < source_code.len() {
424                    &source_code[i..]
425                } else {
426                    &[]
427                }
428            },
429            None,
430            Some(parse_opts),
431        ),
432    };
433    let parse_duration = parse_time.elapsed();
434
435    let stdout = io::stdout();
436    let mut stdout = io::BufWriter::with_capacity(64 * 1024, stdout.lock());
437
438    if let Some(mut tree) = tree {
439        if opts.debug_graph && !opts.edits.is_empty() {
440            info!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
441        }
442
443        let edit_time = Instant::now();
444        for (i, edit) in opts.edits.iter().enumerate() {
445            let edit = parse_edit_flag(&source_code, edit)?;
446            perform_edit(&mut tree, &mut source_code, &edit)?;
447            tree = parser.parse(&source_code, Some(&tree)).unwrap();
448
449            if opts.debug_graph {
450                info!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code));
451            }
452        }
453        let edit_duration = edit_time.elapsed();
454
455        parser.stop_printing_dot_graphs();
456
457        let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3;
458        let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3;
459        let mut cursor = tree.walk();
460
461        if opts.output == ParseOutput::Normal {
462            let mut needs_newline = false;
463            let mut indent_level = 0;
464            let mut did_visit_children = false;
465            loop {
466                let node = cursor.node();
467                let is_named = node.is_named();
468                if did_visit_children {
469                    if is_named {
470                        stdout.write_all(b")")?;
471                        needs_newline = true;
472                    }
473                    if cursor.goto_next_sibling() {
474                        did_visit_children = false;
475                    } else if cursor.goto_parent() {
476                        did_visit_children = true;
477                        indent_level -= 1;
478                    } else {
479                        break;
480                    }
481                } else {
482                    if is_named {
483                        if needs_newline {
484                            stdout.write_all(b"\n")?;
485                        }
486                        for _ in 0..indent_level {
487                            stdout.write_all(b"  ")?;
488                        }
489                        let start = node.start_position();
490                        let end = node.end_position();
491                        if let Some(field_name) = cursor.field_name() {
492                            write!(&mut stdout, "{field_name}: ")?;
493                        }
494                        write!(&mut stdout, "({}", node.kind())?;
495                        if !opts.no_ranges {
496                            write!(
497                                &mut stdout,
498                                " [{}, {}] - [{}, {}]",
499                                start.row, start.column, end.row, end.column
500                            )?;
501                        }
502                        needs_newline = true;
503                    }
504                    if cursor.goto_first_child() {
505                        did_visit_children = false;
506                        indent_level += 1;
507                    } else {
508                        did_visit_children = true;
509                    }
510                }
511            }
512            cursor.reset(tree.root_node());
513            writeln!(&mut stdout)?;
514        }
515
516        if opts.output == ParseOutput::Cst {
517            render_cst(&source_code, &tree, &mut cursor, opts, &mut stdout)?;
518        }
519
520        if opts.output == ParseOutput::Xml {
521            let mut needs_newline = false;
522            let mut indent_level = 2;
523            let mut did_visit_children = false;
524            let mut had_named_children = false;
525            let mut tags = Vec::<&str>::new();
526
527            // If we're parsing the first file, write the header
528            if opts.stats.parse_summaries.is_empty() {
529                writeln!(&mut stdout, "<?xml version=\"1.0\"?>")?;
530                writeln!(&mut stdout, "<sources>")?;
531            }
532            writeln!(&mut stdout, "  <source name=\"{}\">", path.display())?;
533
534            loop {
535                let node = cursor.node();
536                let is_named = node.is_named();
537                if did_visit_children {
538                    if is_named {
539                        let tag = tags.pop();
540                        if had_named_children {
541                            for _ in 0..indent_level {
542                                stdout.write_all(b"  ")?;
543                            }
544                        }
545                        write!(&mut stdout, "</{}>", tag.expect("there is a tag"))?;
546                        // we only write a line in the case where it's the last sibling
547                        if let Some(parent) = node.parent() {
548                            if parent.child(parent.child_count() as u32 - 1).unwrap() == node {
549                                stdout.write_all(b"\n")?;
550                            }
551                        }
552                        needs_newline = true;
553                    }
554                    if cursor.goto_next_sibling() {
555                        did_visit_children = false;
556                        had_named_children = false;
557                    } else if cursor.goto_parent() {
558                        did_visit_children = true;
559                        had_named_children = is_named;
560                        indent_level -= 1;
561                        if !is_named && needs_newline {
562                            stdout.write_all(b"\n")?;
563                            for _ in 0..indent_level {
564                                stdout.write_all(b"  ")?;
565                            }
566                        }
567                    } else {
568                        break;
569                    }
570                } else {
571                    if is_named {
572                        if needs_newline {
573                            stdout.write_all(b"\n")?;
574                        }
575                        for _ in 0..indent_level {
576                            stdout.write_all(b"  ")?;
577                        }
578                        write!(&mut stdout, "<{}", node.kind())?;
579                        if let Some(field_name) = cursor.field_name() {
580                            write!(&mut stdout, " field=\"{field_name}\"")?;
581                        }
582                        let start = node.start_position();
583                        let end = node.end_position();
584                        write!(
585                            &mut stdout,
586                            " srow=\"{}\" scol=\"{}\" erow=\"{}\" ecol=\"{}\">",
587                            start.row, start.column, end.row, end.column
588                        )?;
589                        tags.push(node.kind());
590                        needs_newline = true;
591                    }
592                    if cursor.goto_first_child() {
593                        did_visit_children = false;
594                        had_named_children = false;
595                        indent_level += 1;
596                    } else {
597                        did_visit_children = true;
598                        let start = node.start_byte();
599                        let end = node.end_byte();
600                        let value =
601                            std::str::from_utf8(&source_code[start..end]).expect("has a string");
602                        if !is_named && needs_newline {
603                            stdout.write_all(b"\n")?;
604                            for _ in 0..indent_level {
605                                stdout.write_all(b"  ")?;
606                            }
607                        }
608                        write!(&mut stdout, "{}", html_escape::encode_text(value))?;
609                    }
610                }
611            }
612            writeln!(&mut stdout)?;
613            writeln!(&mut stdout, "  </source>")?;
614
615            // If we parsed the last file, write the closing tag for the `sources` header
616            if opts.stats.parse_summaries.len() == opts.stats.source_count - 1 {
617                writeln!(&mut stdout, "</sources>")?;
618            }
619            cursor.reset(tree.root_node());
620        }
621
622        if opts.output == ParseOutput::Dot {
623            util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap();
624        }
625
626        let mut first_error = None;
627        let mut earliest_node_with_error = None;
628        'outer: loop {
629            let node = cursor.node();
630            if node.has_error() {
631                if earliest_node_with_error.is_none() {
632                    earliest_node_with_error = Some(node);
633                }
634                if node.is_error() || node.is_missing() {
635                    first_error = Some(node);
636                    break;
637                }
638
639                // If there's no more children, even though some outer node has an error,
640                // then that means that the first error is hidden, but the later error could be
641                // visible. So, we walk back up to the child of the first node with an error,
642                // and then check its siblings for errors.
643                if !cursor.goto_first_child() {
644                    let earliest = earliest_node_with_error.unwrap();
645                    while cursor.goto_parent() {
646                        if cursor.node().parent().is_some_and(|p| p == earliest) {
647                            while cursor.goto_next_sibling() {
648                                let sibling = cursor.node();
649                                if sibling.is_error() || sibling.is_missing() {
650                                    first_error = Some(sibling);
651                                    break 'outer;
652                                }
653                                if sibling.has_error() && cursor.goto_first_child() {
654                                    continue 'outer;
655                                }
656                            }
657                            break;
658                        }
659                    }
660                    break;
661                }
662            } else if !cursor.goto_next_sibling() {
663                break;
664            }
665        }
666
667        if first_error.is_some() || opts.print_time {
668            let path = path.to_string_lossy();
669            write!(
670                &mut stdout,
671                "{:width$}\tParse: {parse_duration_ms:>7.2} ms\t{:>6} bytes/ms",
672                name,
673                (source_code.len() as u128 * 1_000_000) / parse_duration.as_nanos(),
674                width = max_path_length
675            )?;
676            if let Some(node) = first_error {
677                let node_kind = node.kind();
678                let mut node_text = String::with_capacity(node_kind.len());
679                for c in node_kind.chars() {
680                    if let Some(escaped) = escape_invisible(c) {
681                        node_text += escaped;
682                    } else {
683                        node_text.push(c);
684                    }
685                }
686                write!(&mut stdout, "\t(")?;
687                if node.is_missing() {
688                    if node.is_named() {
689                        write!(&mut stdout, "MISSING {node_text}")?;
690                    } else {
691                        write!(&mut stdout, "MISSING \"{node_text}\"")?;
692                    }
693                } else {
694                    write!(&mut stdout, "{node_text}")?;
695                }
696
697                let start = node.start_position();
698                let end = node.end_position();
699                write!(
700                    &mut stdout,
701                    " [{}, {}] - [{}, {}])",
702                    start.row, start.column, end.row, end.column
703                )?;
704            }
705            if !opts.edits.is_empty() {
706                write!(
707                    &mut stdout,
708                    "\n{:width$}\tEdit:  {edit_duration_ms:>7.2} ms",
709                    " ".repeat(path.len()),
710                    width = max_path_length,
711                )?;
712            }
713            writeln!(&mut stdout)?;
714        }
715
716        opts.stats.parse_summaries.push(ParseSummary {
717            file: path.to_path_buf(),
718            successful: first_error.is_none(),
719            start: Some(tree.root_node().start_position().into()),
720            end: Some(tree.root_node().end_position().into()),
721            duration: Some(parse_duration),
722            bytes: Some(source_code.len()),
723        });
724
725        return Ok(());
726    }
727    parser.stop_printing_dot_graphs();
728
729    if opts.print_time {
730        let duration = parse_time.elapsed();
731        let duration_ms = duration.as_micros() as f64 / 1e3;
732        writeln!(
733            &mut stdout,
734            "{:width$}\tParse: {duration_ms:>7.2} ms\t(timed out)",
735            path.to_str().unwrap(),
736            width = max_path_length
737        )?;
738    }
739
740    opts.stats.parse_summaries.push(ParseSummary {
741        file: path.to_path_buf(),
742        successful: false,
743        start: None,
744        end: None,
745        duration: None,
746        bytes: Some(source_code.len()),
747    });
748
749    Ok(())
750}
751
752const fn escape_invisible(c: char) -> Option<&'static str> {
753    Some(match c {
754        '\n' => "\\n",
755        '\r' => "\\r",
756        '\t' => "\\t",
757        '\0' => "\\0",
758        '\\' => "\\\\",
759        '\x0b' => "\\v",
760        '\x0c' => "\\f",
761        _ => return None,
762    })
763}
764
765const fn escape_delimiter(c: char) -> Option<&'static str> {
766    Some(match c {
767        '`' => "\\`",
768        '\"' => "\\\"",
769        _ => return None,
770    })
771}
772
773pub fn render_cst<'a, 'b: 'a>(
774    source_code: &[u8],
775    tree: &'b Tree,
776    cursor: &mut TreeCursor<'a>,
777    opts: &ParseFileOptions,
778    out: &mut impl Write,
779) -> Result<()> {
780    let lossy_source_code = String::from_utf8_lossy(source_code);
781    let total_width = lossy_source_code
782        .lines()
783        .enumerate()
784        .map(|(row, col)| {
785            row.checked_ilog10().unwrap_or(0) as usize
786                + col.len().checked_ilog10().unwrap_or(0) as usize
787                + 1
788        })
789        .max()
790        .unwrap_or(1);
791    let mut indent_level = usize::from(!opts.no_ranges);
792    let mut did_visit_children = false;
793    let mut in_error = false;
794    loop {
795        if did_visit_children {
796            if cursor.goto_next_sibling() {
797                did_visit_children = false;
798            } else if cursor.goto_parent() {
799                did_visit_children = true;
800                indent_level -= 1;
801                if !cursor.node().has_error() {
802                    in_error = false;
803                }
804            } else {
805                break;
806            }
807        } else {
808            cst_render_node(
809                opts,
810                cursor,
811                source_code,
812                out,
813                total_width,
814                indent_level,
815                in_error,
816            )?;
817            if cursor.goto_first_child() {
818                did_visit_children = false;
819                indent_level += 1;
820                if cursor.node().has_error() {
821                    in_error = true;
822                }
823            } else {
824                did_visit_children = true;
825            }
826        }
827    }
828    cursor.reset(tree.root_node());
829    Ok(())
830}
831
832fn render_node_text(source: &str) -> String {
833    source
834        .chars()
835        .fold(String::with_capacity(source.len()), |mut acc, c| {
836            if let Some(esc) = escape_invisible(c) {
837                acc.push_str(esc);
838            } else if let Some(esc) = escape_delimiter(c) {
839                acc.push_str(esc);
840            } else {
841                acc.push(c);
842            }
843            acc
844        })
845}
846
847fn write_node_text(
848    opts: &ParseFileOptions,
849    out: &mut impl Write,
850    cursor: &TreeCursor,
851    is_named: bool,
852    source: &str,
853    color: Option<impl Into<Color> + Copy>,
854    text_info: (usize, usize),
855) -> Result<()> {
856    let (total_width, indent_level) = text_info;
857    let (quote, quote_color) = if is_named {
858        ('`', opts.parse_theme.backtick)
859    } else {
860        ('\"', color.map(|c| c.into()))
861    };
862
863    if !is_named {
864        write!(
865            out,
866            "{}{}{}",
867            paint(quote_color, &String::from(quote)),
868            paint(color, &render_node_text(source)),
869            paint(quote_color, &String::from(quote)),
870        )?;
871    } else {
872        let multiline = source.contains('\n');
873        for (i, line) in source.split_inclusive('\n').enumerate() {
874            if line.is_empty() {
875                break;
876            }
877            let mut node_range = cursor.node().range();
878            // For each line of text, adjust the row by shifting it down `i` rows,
879            // and adjust the column by setting it to the length of *this* line.
880            node_range.start_point.row += i;
881            node_range.end_point.row = node_range.start_point.row;
882            node_range.end_point.column = line.len()
883                + if i == 0 {
884                    node_range.start_point.column
885                } else {
886                    0
887                };
888            let formatted_line = render_line_feed(line, opts);
889            write!(
890                out,
891                "{}{}{}{}{}{}",
892                if multiline { "\n" } else { " " },
893                if multiline && !opts.no_ranges {
894                    render_node_range(opts, cursor, is_named, true, total_width, node_range)
895                } else {
896                    String::new()
897                },
898                if multiline {
899                    "  ".repeat(indent_level + 1)
900                } else {
901                    String::new()
902                },
903                paint(quote_color, &String::from(quote)),
904                paint(color, &render_node_text(&formatted_line)),
905                paint(quote_color, &String::from(quote)),
906            )?;
907        }
908    }
909
910    Ok(())
911}
912
913fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String {
914    if cfg!(windows) {
915        source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n"))
916    } else {
917        source.replace('\n', &paint(opts.parse_theme.line_feed, "\n"))
918    }
919}
920
921fn render_node_range(
922    opts: &ParseFileOptions,
923    cursor: &TreeCursor,
924    is_named: bool,
925    is_multiline: bool,
926    total_width: usize,
927    range: Range,
928) -> String {
929    let has_field_name = cursor.field_name().is_some();
930    let start = range.start_point;
931    let end = range.end_point;
932    let range_color = if is_named && !is_multiline && !has_field_name {
933        opts.parse_theme.row_color_named
934    } else {
935        opts.parse_theme.row_color
936    };
937
938    let remaining_width = |row: usize, col: usize| {
939        (total_width
940            .saturating_sub(row.checked_ilog10().unwrap_or(0) as usize)
941            .saturating_sub(col.checked_ilog10().unwrap_or(0) as usize))
942        .max(1)
943    };
944    let remaining_width_start = remaining_width(start.row, start.column);
945    let remaining_width_end = remaining_width(end.row, end.column);
946    paint(
947        range_color,
948        &format!(
949            "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}",
950            start.row, start.column, ' ', end.row, end.column, ' ',
951        ),
952    )
953}
954
955fn cst_render_node(
956    opts: &ParseFileOptions,
957    cursor: &mut TreeCursor,
958    source_code: &[u8],
959    out: &mut impl Write,
960    total_width: usize,
961    indent_level: usize,
962    in_error: bool,
963) -> Result<()> {
964    let node = cursor.node();
965    let is_named = node.is_named();
966    if !opts.no_ranges {
967        write!(
968            out,
969            "{}",
970            render_node_range(opts, cursor, is_named, false, total_width, node.range())
971        )?;
972    }
973    write!(
974        out,
975        "{}{}",
976        "  ".repeat(indent_level),
977        if in_error && !node.has_error() {
978            " "
979        } else {
980            ""
981        }
982    )?;
983    if is_named {
984        if let Some(field_name) = cursor.field_name() {
985            write!(
986                out,
987                "{}",
988                paint(opts.parse_theme.field, &format!("{field_name}: "))
989            )?;
990        }
991
992        if node.has_error() || node.is_error() {
993            write!(out, "{}", paint(opts.parse_theme.error, "•"))?;
994        }
995
996        let kind_color = if node.is_error() {
997            opts.parse_theme.error
998        } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra() && !p.is_error()) {
999            opts.parse_theme.extra
1000        } else {
1001            opts.parse_theme.node_kind
1002        };
1003        write!(out, "{}", paint(kind_color, node.kind()))?;
1004
1005        if node.child_count() == 0 {
1006            // Node text from a pattern or external scanner
1007            write_node_text(
1008                opts,
1009                out,
1010                cursor,
1011                is_named,
1012                &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]),
1013                opts.parse_theme.node_text,
1014                (total_width, indent_level),
1015            )?;
1016        }
1017    } else if node.is_missing() {
1018        write!(out, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
1019        write!(out, "\"{}\"", paint(opts.parse_theme.missing, node.kind()))?;
1020    } else {
1021        // Terminal literals, like "fn"
1022        write_node_text(
1023            opts,
1024            out,
1025            cursor,
1026            is_named,
1027            node.kind(),
1028            opts.parse_theme.literal,
1029            (total_width, indent_level),
1030        )?;
1031    }
1032    writeln!(out)?;
1033
1034    Ok(())
1035}
1036
1037pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
1038    let start_byte = edit.position;
1039    let old_end_byte = edit.position + edit.deleted_length;
1040    let new_end_byte = edit.position + edit.inserted_text.len();
1041    let start_position = position_for_offset(input, start_byte)?;
1042    let old_end_position = position_for_offset(input, old_end_byte)?;
1043    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied());
1044    let new_end_position = position_for_offset(input, new_end_byte)?;
1045    let edit = InputEdit {
1046        start_byte,
1047        old_end_byte,
1048        new_end_byte,
1049        start_position,
1050        old_end_position,
1051        new_end_position,
1052    };
1053    tree.edit(&edit);
1054    Ok(edit)
1055}
1056
1057fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result<Edit> {
1058    let error = || {
1059        anyhow!(concat!(
1060            "Invalid edit string '{}'. ",
1061            "Edit strings must match the pattern '<START_BYTE_OR_POSITION> <REMOVED_LENGTH> <NEW_TEXT>'"
1062        ), flag)
1063    };
1064
1065    // Three whitespace-separated parts:
1066    // * edit position
1067    // * deleted length
1068    // * inserted text
1069    let mut parts = flag.split(' ');
1070    let position = parts.next().ok_or_else(error)?;
1071    let deleted_length = parts.next().ok_or_else(error)?;
1072    let inserted_text = parts.collect::<Vec<_>>().join(" ").into_bytes();
1073
1074    // Position can either be a byte_offset or row,column pair, separated by a comma
1075    let position = if position == "$" {
1076        source_code.len()
1077    } else if position.contains(',') {
1078        let mut parts = position.split(',');
1079        let row = parts.next().ok_or_else(error)?;
1080        let row = row.parse::<usize>().map_err(|_| error())?;
1081        let column = parts.next().ok_or_else(error)?;
1082        let column = column.parse::<usize>().map_err(|_| error())?;
1083        offset_for_position(source_code, Point { row, column })?
1084    } else {
1085        position.parse::<usize>().map_err(|_| error())?
1086    };
1087
1088    // Deleted length must be a byte count.
1089    let deleted_length = deleted_length.parse::<usize>().map_err(|_| error())?;
1090
1091    Ok(Edit {
1092        position,
1093        deleted_length,
1094        inserted_text,
1095    })
1096}
1097
1098pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
1099    let mut row = 0;
1100    let mut offset = 0;
1101    let mut iter = memchr::memchr_iter(b'\n', input);
1102    loop {
1103        if let Some(pos) = iter.next() {
1104            if row < position.row {
1105                row += 1;
1106                offset = pos;
1107                continue;
1108            }
1109        }
1110        offset += 1;
1111        break;
1112    }
1113    if position.row - row > 0 {
1114        return Err(anyhow!("Failed to address a row: {}", position.row));
1115    }
1116    if let Some(pos) = iter.next() {
1117        if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
1118            return Err(anyhow!("Failed to address a column: {}", position.column));
1119        }
1120    } else if input.len() - offset < position.column {
1121        return Err(anyhow!("Failed to address a column over the end"));
1122    }
1123    Ok(offset + position.column)
1124}
1125
1126pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
1127    if offset > input.len() {
1128        return Err(anyhow!("Failed to address an offset: {offset}"));
1129    }
1130    let mut result = Point { row: 0, column: 0 };
1131    let mut last = 0;
1132    for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
1133        result.row += 1;
1134        last = pos;
1135    }
1136    result.column = if result.row > 0 {
1137        offset - last - 1
1138    } else {
1139        offset
1140    };
1141    Ok(result)
1142}