Skip to main content

tree_sitter_cli/
parse.rs

1use std::{
2    fmt, fs,
3    io::{self, Write},
4    ops::ControlFlow,
5    path::{Path, PathBuf},
6    sync::atomic::{AtomicUsize, Ordering},
7    time::{Duration, Instant},
8};
9
10use anstyle::{AnsiColor, Color, RgbColor};
11use anyhow::{anyhow, Context, Result};
12use clap::ValueEnum;
13use log::info;
14use schemars::JsonSchema;
15use serde::{Deserialize, Serialize};
16use tree_sitter::{
17    ffi, InputEdit, Language, LogType, ParseOptions, ParseState, Parser, Point, Range, Tree,
18    TreeCursor,
19};
20
21use crate::{fuzz::edits::Edit, logger::paint, util};
22
23#[derive(Debug, Default, Serialize, JsonSchema)]
24pub struct Stats {
25    pub successful_parses: usize,
26    pub total_parses: usize,
27    pub total_bytes: usize,
28    pub total_duration: Duration,
29}
30
31impl fmt::Display for Stats {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        let duration_us = self.total_duration.as_micros();
34        let success_rate = if self.total_parses > 0 {
35            format!(
36                "{:.2}%",
37                ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0,
38            )
39        } else {
40            "N/A".to_string()
41        };
42        let duration_str = match (self.total_parses, duration_us) {
43            (0, _) => "N/A".to_string(),
44            (_, 0) => "0 bytes/ms".to_string(),
45            (_, _) => format!(
46                "{} bytes/ms",
47                ((self.total_bytes as u128) * 1_000) / duration_us
48            ),
49        };
50        writeln!(
51            f,
52            "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {success_rate}; average speed: {duration_str}",
53            self.total_parses,
54            self.successful_parses,
55            self.total_parses - self.successful_parses,
56        )
57    }
58}
59
60/// Sets the color used in the output of `tree-sitter parse --cst`
61#[derive(Debug, Copy, Clone)]
62pub struct ParseTheme {
63    /// The color of node kinds
64    pub node_kind: Option<Color>,
65    /// The color of text associated with a node
66    pub node_text: Option<Color>,
67    /// The color of node fields
68    pub field: Option<Color>,
69    /// The color of the range information for unnamed nodes
70    pub row_color: Option<Color>,
71    /// The color of the range information for named nodes
72    pub row_color_named: Option<Color>,
73    /// The color of extra nodes
74    pub extra: Option<Color>,
75    /// The color of ERROR nodes
76    pub error: Option<Color>,
77    /// The color of MISSING nodes and their associated text
78    pub missing: Option<Color>,
79    /// The color of newline characters
80    pub line_feed: Option<Color>,
81    /// The color of backticks
82    pub backtick: Option<Color>,
83    /// The color of literals
84    pub literal: Option<Color>,
85}
86
87impl ParseTheme {
88    const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118));
89    const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181));
90    const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51));
91    const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173));
92    const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67));
93
94    #[must_use]
95    pub const fn empty() -> Self {
96        Self {
97            node_kind: None,
98            node_text: None,
99            field: None,
100            row_color: None,
101            row_color_named: None,
102            extra: None,
103            error: None,
104            missing: None,
105            line_feed: None,
106            backtick: None,
107            literal: None,
108        }
109    }
110}
111
112impl Default for ParseTheme {
113    fn default() -> Self {
114        Self {
115            node_kind: Some(AnsiColor::BrightCyan.into()),
116            node_text: Some(Self::GRAY),
117            field: Some(AnsiColor::Blue.into()),
118            row_color: Some(AnsiColor::White.into()),
119            row_color_named: Some(AnsiColor::BrightCyan.into()),
120            extra: Some(AnsiColor::BrightMagenta.into()),
121            error: Some(AnsiColor::Red.into()),
122            missing: Some(Self::ORANGE),
123            line_feed: Some(Self::LIGHT_GRAY),
124            backtick: Some(Self::GREEN),
125            literal: Some(Self::YELLOW),
126        }
127    }
128}
129
130#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
131pub struct Rgb(pub u8, pub u8, pub u8);
132
133impl From<Rgb> for RgbColor {
134    fn from(val: Rgb) -> Self {
135        Self(val.0, val.1, val.2)
136    }
137}
138
139#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
140#[serde(rename_all = "kebab-case")]
141pub struct Config {
142    pub parse_theme: Option<ParseThemeRaw>,
143}
144
145#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
146#[serde(rename_all = "kebab-case")]
147pub struct ParseThemeRaw {
148    pub node_kind: Option<Rgb>,
149    pub node_text: Option<Rgb>,
150    pub field: Option<Rgb>,
151    pub row_color: Option<Rgb>,
152    pub row_color_named: Option<Rgb>,
153    pub extra: Option<Rgb>,
154    pub error: Option<Rgb>,
155    pub missing: Option<Rgb>,
156    pub line_feed: Option<Rgb>,
157    pub backtick: Option<Rgb>,
158    pub literal: Option<Rgb>,
159}
160
161impl From<ParseThemeRaw> for ParseTheme {
162    fn from(value: ParseThemeRaw) -> Self {
163        let val_or_default = |val: Option<Rgb>, default: Option<Color>| -> Option<Color> {
164            val.map_or(default, |v| Some(Color::Rgb(v.into())))
165        };
166        let default = Self::default();
167
168        Self {
169            node_kind: val_or_default(value.node_kind, default.node_kind),
170            node_text: val_or_default(value.node_text, default.node_text),
171            field: val_or_default(value.field, default.field),
172            row_color: val_or_default(value.row_color, default.row_color),
173            row_color_named: val_or_default(value.row_color_named, default.row_color_named),
174            extra: val_or_default(value.extra, default.extra),
175            error: val_or_default(value.error, default.error),
176            missing: val_or_default(value.missing, default.missing),
177            line_feed: val_or_default(value.line_feed, default.line_feed),
178            backtick: val_or_default(value.backtick, default.backtick),
179            literal: val_or_default(value.literal, default.literal),
180        }
181    }
182}
183
184#[derive(Copy, Clone, PartialEq, Eq)]
185pub enum ParseOutput {
186    Normal,
187    Quiet,
188    Xml,
189    Cst,
190    Dot,
191}
192
193/// A position in a multi-line text document, in terms of rows and columns.
194///
195/// Rows and columns are zero-based.
196///
197/// This serves as a serializable wrapper for `Point`
198#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
199pub struct ParsePoint {
200    pub row: usize,
201    pub column: usize,
202}
203
204impl From<Point> for ParsePoint {
205    fn from(value: Point) -> Self {
206        Self {
207            row: value.row,
208            column: value.column,
209        }
210    }
211}
212
213#[derive(Serialize, Default, Debug, Clone)]
214pub struct ParseSummary {
215    pub file: PathBuf,
216    pub successful: bool,
217    pub start: Option<ParsePoint>,
218    pub end: Option<ParsePoint>,
219    pub duration: Option<Duration>,
220    pub bytes: Option<usize>,
221}
222
223impl ParseSummary {
224    #[must_use]
225    pub fn new(path: &Path) -> Self {
226        Self {
227            file: path.to_path_buf(),
228            successful: false,
229            ..Default::default()
230        }
231    }
232}
233
234#[derive(Serialize, Debug)]
235pub struct ParseStats {
236    pub parse_summaries: Vec<ParseSummary>,
237    pub cumulative_stats: Stats,
238    pub source_count: usize,
239}
240
241impl Default for ParseStats {
242    fn default() -> Self {
243        Self {
244            parse_summaries: Vec::new(),
245            cumulative_stats: Stats::default(),
246            source_count: 1,
247        }
248    }
249}
250
251#[derive(Serialize, ValueEnum, Debug, Copy, Clone, Default, Eq, PartialEq)]
252pub enum ParseDebugType {
253    #[default]
254    Quiet,
255    Normal,
256    Pretty,
257}
258
259pub struct ParseFileOptions<'a> {
260    pub edits: &'a [&'a str],
261    pub output: ParseOutput,
262    pub stats: &'a mut ParseStats,
263    pub print_time: bool,
264    pub timeout: u64,
265    pub debug: ParseDebugType,
266    pub debug_graph: bool,
267    pub cancellation_flag: Option<&'a AtomicUsize>,
268    pub encoding: Option<u32>,
269    pub open_log: bool,
270    pub no_ranges: bool,
271    pub parse_theme: &'a ParseTheme,
272}
273
274#[derive(Copy, Clone)]
275pub struct ParseResult {
276    pub successful: bool,
277    pub bytes: usize,
278    pub duration: Option<Duration>,
279}
280
281pub fn parse_file_at_path(
282    parser: &mut Parser,
283    language: &Language,
284    path: &Path,
285    name: &str,
286    max_path_length: usize,
287    opts: &mut ParseFileOptions,
288) -> Result<()> {
289    let mut _log_session = None;
290    parser.set_language(language)?;
291    let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?;
292
293    // Render an HTML graph if `--debug-graph` was passed
294    if opts.debug_graph {
295        _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?);
296    }
297    // Log to stderr if `--debug` was passed
298    else if opts.debug != ParseDebugType::Quiet {
299        let mut curr_version: usize = 0;
300        let use_color = std::env::var("NO_COLOR").map_or(true, |v| v != "1");
301        let debug = opts.debug;
302        parser.set_logger(Some(Box::new(move |log_type, message| {
303            if debug == ParseDebugType::Normal {
304                if log_type == LogType::Lex {
305                    write!(&mut io::stderr(), "  ").unwrap();
306                }
307                writeln!(&mut io::stderr(), "{message}").unwrap();
308            } else {
309                let colors = &[
310                    AnsiColor::White,
311                    AnsiColor::Red,
312                    AnsiColor::Blue,
313                    AnsiColor::Green,
314                    AnsiColor::Cyan,
315                    AnsiColor::Yellow,
316                ];
317                if message.starts_with("process version:") {
318                    let comma_idx = message.find(',').unwrap();
319                    curr_version = message["process version:".len()..comma_idx]
320                        .parse()
321                        .unwrap();
322                }
323                let color = if use_color {
324                    Some(colors[curr_version])
325                } else {
326                    None
327                };
328                let mut out = if log_type == LogType::Lex {
329                    "  ".to_string()
330                } else {
331                    String::new()
332                };
333                out += &paint(color, message);
334                writeln!(&mut io::stderr(), "{out}").unwrap();
335            }
336        })));
337    }
338
339    let parse_time = Instant::now();
340
341    #[inline(always)]
342    fn is_utf16_le_bom(bom_bytes: &[u8]) -> bool {
343        bom_bytes == [0xFF, 0xFE]
344    }
345
346    #[inline(always)]
347    fn is_utf16_be_bom(bom_bytes: &[u8]) -> bool {
348        bom_bytes == [0xFE, 0xFF]
349    }
350
351    let encoding = match opts.encoding {
352        None if source_code.len() >= 2 => {
353            if is_utf16_le_bom(&source_code[0..2]) {
354                Some(ffi::TSInputEncodingUTF16LE)
355            } else if is_utf16_be_bom(&source_code[0..2]) {
356                Some(ffi::TSInputEncodingUTF16BE)
357            } else {
358                None
359            }
360        }
361        _ => opts.encoding,
362    };
363
364    // If the `--cancel` flag was passed, then cancel the parse
365    // when the user types a newline.
366    //
367    // Additionally, if the `--time` flag was passed, end the parse
368    // after the specified number of microseconds.
369    let start_time = Instant::now();
370    let progress_callback = &mut |_: &ParseState| {
371        if let Some(cancellation_flag) = opts.cancellation_flag {
372            if cancellation_flag.load(Ordering::SeqCst) != 0 {
373                return ControlFlow::Break(());
374            }
375        }
376
377        if opts.timeout > 0 && start_time.elapsed().as_micros() > opts.timeout as u128 {
378            return ControlFlow::Break(());
379        }
380
381        ControlFlow::Continue(())
382    };
383
384    let parse_opts = ParseOptions::new().progress_callback(progress_callback);
385
386    let tree = match encoding {
387        Some(encoding) if encoding == ffi::TSInputEncodingUTF16LE => {
388            let source_code_utf16 = source_code
389                .chunks_exact(2)
390                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
391                .collect::<Vec<_>>();
392            parser.parse_utf16_le_with_options(
393                &mut |i, _| {
394                    if i < source_code_utf16.len() {
395                        &source_code_utf16[i..]
396                    } else {
397                        &[]
398                    }
399                },
400                None,
401                Some(parse_opts),
402            )
403        }
404        Some(encoding) if encoding == ffi::TSInputEncodingUTF16BE => {
405            let source_code_utf16 = source_code
406                .chunks_exact(2)
407                .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
408                .collect::<Vec<_>>();
409            parser.parse_utf16_be_with_options(
410                &mut |i, _| {
411                    if i < source_code_utf16.len() {
412                        &source_code_utf16[i..]
413                    } else {
414                        &[]
415                    }
416                },
417                None,
418                Some(parse_opts),
419            )
420        }
421        _ => parser.parse_with_options(
422            &mut |i, _| {
423                if i < source_code.len() {
424                    &source_code[i..]
425                } else {
426                    &[]
427                }
428            },
429            None,
430            Some(parse_opts),
431        ),
432    };
433    let parse_duration = parse_time.elapsed();
434
435    let stdout = io::stdout();
436    let mut stdout = stdout.lock();
437
438    if let Some(mut tree) = tree {
439        if opts.debug_graph && !opts.edits.is_empty() {
440            info!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
441        }
442
443        let edit_time = Instant::now();
444        for (i, edit) in opts.edits.iter().enumerate() {
445            let edit = parse_edit_flag(&source_code, edit)?;
446            perform_edit(&mut tree, &mut source_code, &edit)?;
447            tree = parser.parse(&source_code, Some(&tree)).unwrap();
448
449            if opts.debug_graph {
450                info!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code));
451            }
452        }
453        let edit_duration = edit_time.elapsed();
454
455        parser.stop_printing_dot_graphs();
456
457        let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3;
458        let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3;
459        let mut cursor = tree.walk();
460
461        if opts.output == ParseOutput::Normal {
462            let mut needs_newline = false;
463            let mut indent_level = 0;
464            let mut did_visit_children = false;
465            loop {
466                let node = cursor.node();
467                let is_named = node.is_named();
468                if did_visit_children {
469                    if is_named {
470                        stdout.write_all(b")")?;
471                        needs_newline = true;
472                    }
473                    if cursor.goto_next_sibling() {
474                        did_visit_children = false;
475                    } else if cursor.goto_parent() {
476                        did_visit_children = true;
477                        indent_level -= 1;
478                    } else {
479                        break;
480                    }
481                } else {
482                    if is_named {
483                        if needs_newline {
484                            stdout.write_all(b"\n")?;
485                        }
486                        for _ in 0..indent_level {
487                            stdout.write_all(b"  ")?;
488                        }
489                        let start = node.start_position();
490                        let end = node.end_position();
491                        if let Some(field_name) = cursor.field_name() {
492                            write!(&mut stdout, "{field_name}: ")?;
493                        }
494                        write!(&mut stdout, "({}", node.kind())?;
495                        if !opts.no_ranges {
496                            write!(
497                                &mut stdout,
498                                " [{}, {}] - [{}, {}]",
499                                start.row, start.column, end.row, end.column
500                            )?;
501                        }
502                        needs_newline = true;
503                    }
504                    if cursor.goto_first_child() {
505                        did_visit_children = false;
506                        indent_level += 1;
507                    } else {
508                        did_visit_children = true;
509                    }
510                }
511            }
512            cursor.reset(tree.root_node());
513            println!();
514        }
515
516        if opts.output == ParseOutput::Cst {
517            render_cst(&source_code, &tree, &mut cursor, opts, &mut stdout)?;
518        }
519
520        if opts.output == ParseOutput::Xml {
521            let mut needs_newline = false;
522            let mut indent_level = 2;
523            let mut did_visit_children = false;
524            let mut had_named_children = false;
525            let mut tags = Vec::<&str>::new();
526
527            // If we're parsing the first file, write the header
528            if opts.stats.parse_summaries.is_empty() {
529                writeln!(&mut stdout, "<?xml version=\"1.0\"?>")?;
530                writeln!(&mut stdout, "<sources>")?;
531            }
532            writeln!(&mut stdout, "  <source name=\"{}\">", path.display())?;
533
534            loop {
535                let node = cursor.node();
536                let is_named = node.is_named();
537                if did_visit_children {
538                    if is_named {
539                        let tag = tags.pop();
540                        if had_named_children {
541                            for _ in 0..indent_level {
542                                stdout.write_all(b"  ")?;
543                            }
544                        }
545                        write!(&mut stdout, "</{}>", tag.expect("there is a tag"))?;
546                        // we only write a line in the case where it's the last sibling
547                        if let Some(parent) = node.parent() {
548                            if parent.child(parent.child_count() as u32 - 1).unwrap() == node {
549                                stdout.write_all(b"\n")?;
550                            }
551                        }
552                        needs_newline = true;
553                    }
554                    if cursor.goto_next_sibling() {
555                        did_visit_children = false;
556                        had_named_children = false;
557                    } else if cursor.goto_parent() {
558                        did_visit_children = true;
559                        had_named_children = is_named;
560                        indent_level -= 1;
561                        if !is_named && needs_newline {
562                            stdout.write_all(b"\n")?;
563                            for _ in 0..indent_level {
564                                stdout.write_all(b"  ")?;
565                            }
566                        }
567                    } else {
568                        break;
569                    }
570                } else {
571                    if is_named {
572                        if needs_newline {
573                            stdout.write_all(b"\n")?;
574                        }
575                        for _ in 0..indent_level {
576                            stdout.write_all(b"  ")?;
577                        }
578                        write!(&mut stdout, "<{}", node.kind())?;
579                        if let Some(field_name) = cursor.field_name() {
580                            write!(&mut stdout, " field=\"{field_name}\"")?;
581                        }
582                        let start = node.start_position();
583                        let end = node.end_position();
584                        write!(&mut stdout, " srow=\"{}\"", start.row)?;
585                        write!(&mut stdout, " scol=\"{}\"", start.column)?;
586                        write!(&mut stdout, " erow=\"{}\"", end.row)?;
587                        write!(&mut stdout, " ecol=\"{}\"", end.column)?;
588                        write!(&mut stdout, ">")?;
589                        tags.push(node.kind());
590                        needs_newline = true;
591                    }
592                    if cursor.goto_first_child() {
593                        did_visit_children = false;
594                        had_named_children = false;
595                        indent_level += 1;
596                    } else {
597                        did_visit_children = true;
598                        let start = node.start_byte();
599                        let end = node.end_byte();
600                        let value =
601                            std::str::from_utf8(&source_code[start..end]).expect("has a string");
602                        if !is_named && needs_newline {
603                            stdout.write_all(b"\n")?;
604                            for _ in 0..indent_level {
605                                stdout.write_all(b"  ")?;
606                            }
607                        }
608                        write!(&mut stdout, "{}", html_escape::encode_text(value))?;
609                    }
610                }
611            }
612            writeln!(&mut stdout)?;
613            writeln!(&mut stdout, "  </source>")?;
614
615            // If we parsed the last file, write the closing tag for the `sources` header
616            if opts.stats.parse_summaries.len() == opts.stats.source_count - 1 {
617                writeln!(&mut stdout, "</sources>")?;
618            }
619            cursor.reset(tree.root_node());
620        }
621
622        if opts.output == ParseOutput::Dot {
623            util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap();
624        }
625
626        let mut first_error = None;
627        let mut earliest_node_with_error = None;
628        'outer: loop {
629            let node = cursor.node();
630            if node.has_error() {
631                if earliest_node_with_error.is_none() {
632                    earliest_node_with_error = Some(node);
633                }
634                if node.is_error() || node.is_missing() {
635                    first_error = Some(node);
636                    break;
637                }
638
639                // If there's no more children, even though some outer node has an error,
640                // then that means that the first error is hidden, but the later error could be
641                // visible. So, we walk back up to the child of the first node with an error,
642                // and then check its siblings for errors.
643                if !cursor.goto_first_child() {
644                    let earliest = earliest_node_with_error.unwrap();
645                    while cursor.goto_parent() {
646                        if cursor.node().parent().is_some_and(|p| p == earliest) {
647                            while cursor.goto_next_sibling() {
648                                let sibling = cursor.node();
649                                if sibling.is_error() || sibling.is_missing() {
650                                    first_error = Some(sibling);
651                                    break 'outer;
652                                }
653                                if sibling.has_error() && cursor.goto_first_child() {
654                                    continue 'outer;
655                                }
656                            }
657                            break;
658                        }
659                    }
660                    break;
661                }
662            } else if !cursor.goto_next_sibling() {
663                break;
664            }
665        }
666
667        if first_error.is_some() || opts.print_time {
668            let path = path.to_string_lossy();
669            write!(
670                &mut stdout,
671                "{:width$}\tParse: {parse_duration_ms:>7.2} ms\t{:>6} bytes/ms",
672                name,
673                (source_code.len() as u128 * 1_000_000) / parse_duration.as_nanos(),
674                width = max_path_length
675            )?;
676            if let Some(node) = first_error {
677                let node_kind = node.kind();
678                let mut node_text = String::with_capacity(node_kind.len());
679                for c in node_kind.chars() {
680                    if let Some(escaped) = escape_invisible(c) {
681                        node_text += escaped;
682                    } else {
683                        node_text.push(c);
684                    }
685                }
686                write!(&mut stdout, "\t(")?;
687                if node.is_missing() {
688                    if node.is_named() {
689                        write!(&mut stdout, "MISSING {node_text}")?;
690                    } else {
691                        write!(&mut stdout, "MISSING \"{node_text}\"")?;
692                    }
693                } else {
694                    write!(&mut stdout, "{node_text}")?;
695                }
696
697                let start = node.start_position();
698                let end = node.end_position();
699                write!(
700                    &mut stdout,
701                    " [{}, {}] - [{}, {}])",
702                    start.row, start.column, end.row, end.column
703                )?;
704            }
705            if !opts.edits.is_empty() {
706                write!(
707                    &mut stdout,
708                    "\n{:width$}\tEdit:  {edit_duration_ms:>7.2} ms",
709                    " ".repeat(path.len()),
710                    width = max_path_length,
711                )?;
712            }
713            writeln!(&mut stdout)?;
714        }
715
716        opts.stats.parse_summaries.push(ParseSummary {
717            file: path.to_path_buf(),
718            successful: first_error.is_none(),
719            start: Some(tree.root_node().start_position().into()),
720            end: Some(tree.root_node().end_position().into()),
721            duration: Some(parse_duration),
722            bytes: Some(source_code.len()),
723        });
724
725        return Ok(());
726    }
727    parser.stop_printing_dot_graphs();
728
729    if opts.print_time {
730        let duration = parse_time.elapsed();
731        let duration_ms = duration.as_micros() as f64 / 1e3;
732        writeln!(
733            &mut stdout,
734            "{:width$}\tParse: {duration_ms:>7.2} ms\t(timed out)",
735            path.to_str().unwrap(),
736            width = max_path_length
737        )?;
738    }
739
740    opts.stats.parse_summaries.push(ParseSummary {
741        file: path.to_path_buf(),
742        successful: false,
743        start: None,
744        end: None,
745        duration: None,
746        bytes: Some(source_code.len()),
747    });
748
749    Ok(())
750}
751
752const fn escape_invisible(c: char) -> Option<&'static str> {
753    Some(match c {
754        '\n' => "\\n",
755        '\r' => "\\r",
756        '\t' => "\\t",
757        '\0' => "\\0",
758        '\\' => "\\\\",
759        '\x0b' => "\\v",
760        '\x0c' => "\\f",
761        _ => return None,
762    })
763}
764
765const fn escape_delimiter(c: char) -> Option<&'static str> {
766    Some(match c {
767        '`' => "\\`",
768        '\"' => "\\\"",
769        _ => return None,
770    })
771}
772
773pub fn render_cst<'a, 'b: 'a>(
774    source_code: &[u8],
775    tree: &'b Tree,
776    cursor: &mut TreeCursor<'a>,
777    opts: &ParseFileOptions,
778    out: &mut impl Write,
779) -> Result<()> {
780    let lossy_source_code = String::from_utf8_lossy(source_code);
781    let total_width = lossy_source_code
782        .lines()
783        .enumerate()
784        .map(|(row, col)| (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1)
785        .max()
786        .unwrap_or(1);
787    let mut indent_level = usize::from(!opts.no_ranges);
788    let mut did_visit_children = false;
789    let mut in_error = false;
790    loop {
791        if did_visit_children {
792            if cursor.goto_next_sibling() {
793                did_visit_children = false;
794            } else if cursor.goto_parent() {
795                did_visit_children = true;
796                indent_level -= 1;
797                if !cursor.node().has_error() {
798                    in_error = false;
799                }
800            } else {
801                break;
802            }
803        } else {
804            cst_render_node(
805                opts,
806                cursor,
807                source_code,
808                out,
809                total_width,
810                indent_level,
811                in_error,
812            )?;
813            if cursor.goto_first_child() {
814                did_visit_children = false;
815                indent_level += 1;
816                if cursor.node().has_error() {
817                    in_error = true;
818                }
819            } else {
820                did_visit_children = true;
821            }
822        }
823    }
824    cursor.reset(tree.root_node());
825    Ok(())
826}
827
828fn render_node_text(source: &str) -> String {
829    source
830        .chars()
831        .fold(String::with_capacity(source.len()), |mut acc, c| {
832            if let Some(esc) = escape_invisible(c) {
833                acc.push_str(esc);
834            } else if let Some(esc) = escape_delimiter(c) {
835                acc.push_str(esc);
836            } else {
837                acc.push(c);
838            }
839            acc
840        })
841}
842
843fn write_node_text(
844    opts: &ParseFileOptions,
845    out: &mut impl Write,
846    cursor: &TreeCursor,
847    is_named: bool,
848    source: &str,
849    color: Option<impl Into<Color> + Copy>,
850    text_info: (usize, usize),
851) -> Result<()> {
852    let (total_width, indent_level) = text_info;
853    let (quote, quote_color) = if is_named {
854        ('`', opts.parse_theme.backtick)
855    } else {
856        ('\"', color.map(|c| c.into()))
857    };
858
859    if !is_named {
860        write!(
861            out,
862            "{}{}{}",
863            paint(quote_color, &String::from(quote)),
864            paint(color, &render_node_text(source)),
865            paint(quote_color, &String::from(quote)),
866        )?;
867    } else {
868        let multiline = source.contains('\n');
869        for (i, line) in source.split_inclusive('\n').enumerate() {
870            if line.is_empty() {
871                break;
872            }
873            let mut node_range = cursor.node().range();
874            // For each line of text, adjust the row by shifting it down `i` rows,
875            // and adjust the column by setting it to the length of *this* line.
876            node_range.start_point.row += i;
877            node_range.end_point.row = node_range.start_point.row;
878            node_range.end_point.column = line.len()
879                + if i == 0 {
880                    node_range.start_point.column
881                } else {
882                    0
883                };
884            let formatted_line = render_line_feed(line, opts);
885            write!(
886                out,
887                "{}{}{}{}{}{}",
888                if multiline { "\n" } else { " " },
889                if multiline && !opts.no_ranges {
890                    render_node_range(opts, cursor, is_named, true, total_width, node_range)
891                } else {
892                    String::new()
893                },
894                if multiline {
895                    "  ".repeat(indent_level + 1)
896                } else {
897                    String::new()
898                },
899                paint(quote_color, &String::from(quote)),
900                paint(color, &render_node_text(&formatted_line)),
901                paint(quote_color, &String::from(quote)),
902            )?;
903        }
904    }
905
906    Ok(())
907}
908
909fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String {
910    if cfg!(windows) {
911        source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n"))
912    } else {
913        source.replace('\n', &paint(opts.parse_theme.line_feed, "\n"))
914    }
915}
916
917fn render_node_range(
918    opts: &ParseFileOptions,
919    cursor: &TreeCursor,
920    is_named: bool,
921    is_multiline: bool,
922    total_width: usize,
923    range: Range,
924) -> String {
925    let has_field_name = cursor.field_name().is_some();
926    let range_color = if is_named && !is_multiline && !has_field_name {
927        opts.parse_theme.row_color_named
928    } else {
929        opts.parse_theme.row_color
930    };
931
932    let remaining_width_start = (total_width
933        - (range.start_point.row as f64).log10() as usize
934        - (range.start_point.column as f64).log10() as usize)
935        .max(1);
936    let remaining_width_end = (total_width
937        - (range.end_point.row as f64).log10() as usize
938        - (range.end_point.column as f64).log10() as usize)
939        .max(1);
940    paint(
941        range_color,
942        &format!(
943            "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}",
944            range.start_point.row,
945            range.start_point.column,
946            ' ',
947            range.end_point.row,
948            range.end_point.column,
949            ' ',
950        ),
951    )
952}
953
954fn cst_render_node(
955    opts: &ParseFileOptions,
956    cursor: &mut TreeCursor,
957    source_code: &[u8],
958    out: &mut impl Write,
959    total_width: usize,
960    indent_level: usize,
961    in_error: bool,
962) -> Result<()> {
963    let node = cursor.node();
964    let is_named = node.is_named();
965    if !opts.no_ranges {
966        write!(
967            out,
968            "{}",
969            render_node_range(opts, cursor, is_named, false, total_width, node.range())
970        )?;
971    }
972    write!(
973        out,
974        "{}{}",
975        "  ".repeat(indent_level),
976        if in_error && !node.has_error() {
977            " "
978        } else {
979            ""
980        }
981    )?;
982    if is_named {
983        if let Some(field_name) = cursor.field_name() {
984            write!(
985                out,
986                "{}",
987                paint(opts.parse_theme.field, &format!("{field_name}: "))
988            )?;
989        }
990
991        if node.has_error() || node.is_error() {
992            write!(out, "{}", paint(opts.parse_theme.error, "•"))?;
993        }
994
995        let kind_color = if node.is_error() {
996            opts.parse_theme.error
997        } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra() && !p.is_error()) {
998            opts.parse_theme.extra
999        } else {
1000            opts.parse_theme.node_kind
1001        };
1002        write!(out, "{}", paint(kind_color, node.kind()))?;
1003
1004        if node.child_count() == 0 {
1005            // Node text from a pattern or external scanner
1006            write_node_text(
1007                opts,
1008                out,
1009                cursor,
1010                is_named,
1011                &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]),
1012                opts.parse_theme.node_text,
1013                (total_width, indent_level),
1014            )?;
1015        }
1016    } else if node.is_missing() {
1017        write!(out, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
1018        write!(out, "\"{}\"", paint(opts.parse_theme.missing, node.kind()))?;
1019    } else {
1020        // Terminal literals, like "fn"
1021        write_node_text(
1022            opts,
1023            out,
1024            cursor,
1025            is_named,
1026            node.kind(),
1027            opts.parse_theme.literal,
1028            (total_width, indent_level),
1029        )?;
1030    }
1031    writeln!(out)?;
1032
1033    Ok(())
1034}
1035
1036pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
1037    let start_byte = edit.position;
1038    let old_end_byte = edit.position + edit.deleted_length;
1039    let new_end_byte = edit.position + edit.inserted_text.len();
1040    let start_position = position_for_offset(input, start_byte)?;
1041    let old_end_position = position_for_offset(input, old_end_byte)?;
1042    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied());
1043    let new_end_position = position_for_offset(input, new_end_byte)?;
1044    let edit = InputEdit {
1045        start_byte,
1046        old_end_byte,
1047        new_end_byte,
1048        start_position,
1049        old_end_position,
1050        new_end_position,
1051    };
1052    tree.edit(&edit);
1053    Ok(edit)
1054}
1055
1056fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result<Edit> {
1057    let error = || {
1058        anyhow!(concat!(
1059            "Invalid edit string '{}'. ",
1060            "Edit strings must match the pattern '<START_BYTE_OR_POSITION> <REMOVED_LENGTH> <NEW_TEXT>'"
1061        ), flag)
1062    };
1063
1064    // Three whitespace-separated parts:
1065    // * edit position
1066    // * deleted length
1067    // * inserted text
1068    let mut parts = flag.split(' ');
1069    let position = parts.next().ok_or_else(error)?;
1070    let deleted_length = parts.next().ok_or_else(error)?;
1071    let inserted_text = parts.collect::<Vec<_>>().join(" ").into_bytes();
1072
1073    // Position can either be a byte_offset or row,column pair, separated by a comma
1074    let position = if position == "$" {
1075        source_code.len()
1076    } else if position.contains(',') {
1077        let mut parts = position.split(',');
1078        let row = parts.next().ok_or_else(error)?;
1079        let row = row.parse::<usize>().map_err(|_| error())?;
1080        let column = parts.next().ok_or_else(error)?;
1081        let column = column.parse::<usize>().map_err(|_| error())?;
1082        offset_for_position(source_code, Point { row, column })?
1083    } else {
1084        position.parse::<usize>().map_err(|_| error())?
1085    };
1086
1087    // Deleted length must be a byte count.
1088    let deleted_length = deleted_length.parse::<usize>().map_err(|_| error())?;
1089
1090    Ok(Edit {
1091        position,
1092        deleted_length,
1093        inserted_text,
1094    })
1095}
1096
1097pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
1098    let mut row = 0;
1099    let mut offset = 0;
1100    let mut iter = memchr::memchr_iter(b'\n', input);
1101    loop {
1102        if let Some(pos) = iter.next() {
1103            if row < position.row {
1104                row += 1;
1105                offset = pos;
1106                continue;
1107            }
1108        }
1109        offset += 1;
1110        break;
1111    }
1112    if position.row - row > 0 {
1113        return Err(anyhow!("Failed to address a row: {}", position.row));
1114    }
1115    if let Some(pos) = iter.next() {
1116        if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
1117            return Err(anyhow!("Failed to address a column: {}", position.column));
1118        }
1119    } else if input.len() - offset < position.column {
1120        return Err(anyhow!("Failed to address a column over the end"));
1121    }
1122    Ok(offset + position.column)
1123}
1124
1125pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
1126    if offset > input.len() {
1127        return Err(anyhow!("Failed to address an offset: {offset}"));
1128    }
1129    let mut result = Point { row: 0, column: 0 };
1130    let mut last = 0;
1131    for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
1132        result.row += 1;
1133        last = pos;
1134    }
1135    result.column = if result.row > 0 {
1136        offset - last - 1
1137    } else {
1138        offset
1139    };
1140    Ok(result)
1141}