tree_sitter_cli/
parse.rs

1use std::{
2    fmt, fs,
3    io::{self, Write},
4    ops::ControlFlow,
5    path::{Path, PathBuf},
6    sync::atomic::{AtomicUsize, Ordering},
7    time::{Duration, Instant},
8};
9
10use anstyle::{AnsiColor, Color, RgbColor};
11use anyhow::{anyhow, Context, Result};
12use clap::ValueEnum;
13use log::info;
14use schemars::JsonSchema;
15use serde::{Deserialize, Serialize};
16use tree_sitter::{
17    ffi, InputEdit, Language, LogType, ParseOptions, ParseState, Parser, Point, Range, Tree,
18    TreeCursor,
19};
20
21use crate::{fuzz::edits::Edit, logger::paint, util};
22
23#[derive(Debug, Default, Serialize, JsonSchema)]
24pub struct Stats {
25    pub successful_parses: usize,
26    pub total_parses: usize,
27    pub total_bytes: usize,
28    pub total_duration: Duration,
29}
30
31impl fmt::Display for Stats {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        let duration_us = self.total_duration.as_micros();
34        let success_rate = if self.total_parses > 0 {
35            format!(
36                "{:.2}%",
37                ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0,
38            )
39        } else {
40            "N/A".to_string()
41        };
42        let duration_str = match (self.total_parses, duration_us) {
43            (0, _) => "N/A".to_string(),
44            (_, 0) => "0 bytes/ms".to_string(),
45            (_, _) => format!(
46                "{} bytes/ms",
47                ((self.total_bytes as u128) * 1_000) / duration_us
48            ),
49        };
50        writeln!(
51            f,
52            "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {success_rate}; average speed: {duration_str}",
53            self.total_parses,
54            self.successful_parses,
55            self.total_parses - self.successful_parses,
56        )
57    }
58}
59
60/// Sets the color used in the output of `tree-sitter parse --cst`
61#[derive(Debug, Copy, Clone)]
62pub struct ParseTheme {
63    /// The color of node kinds
64    pub node_kind: Option<Color>,
65    /// The color of text associated with a node
66    pub node_text: Option<Color>,
67    /// The color of node fields
68    pub field: Option<Color>,
69    /// The color of the range information for unnamed nodes
70    pub row_color: Option<Color>,
71    /// The color of the range information for named nodes
72    pub row_color_named: Option<Color>,
73    /// The color of extra nodes
74    pub extra: Option<Color>,
75    /// The color of ERROR nodes
76    pub error: Option<Color>,
77    /// The color of MISSING nodes and their associated text
78    pub missing: Option<Color>,
79    /// The color of newline characters
80    pub line_feed: Option<Color>,
81    /// The color of backticks
82    pub backtick: Option<Color>,
83    /// The color of literals
84    pub literal: Option<Color>,
85}
86
87impl ParseTheme {
88    const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118));
89    const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181));
90    const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51));
91    const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173));
92    const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67));
93
94    #[must_use]
95    pub const fn empty() -> Self {
96        Self {
97            node_kind: None,
98            node_text: None,
99            field: None,
100            row_color: None,
101            row_color_named: None,
102            extra: None,
103            error: None,
104            missing: None,
105            line_feed: None,
106            backtick: None,
107            literal: None,
108        }
109    }
110}
111
112impl Default for ParseTheme {
113    fn default() -> Self {
114        Self {
115            node_kind: Some(AnsiColor::BrightCyan.into()),
116            node_text: Some(Self::GRAY),
117            field: Some(AnsiColor::Blue.into()),
118            row_color: Some(AnsiColor::White.into()),
119            row_color_named: Some(AnsiColor::BrightCyan.into()),
120            extra: Some(AnsiColor::BrightMagenta.into()),
121            error: Some(AnsiColor::Red.into()),
122            missing: Some(Self::ORANGE),
123            line_feed: Some(Self::LIGHT_GRAY),
124            backtick: Some(Self::GREEN),
125            literal: Some(Self::YELLOW),
126        }
127    }
128}
129
130#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
131pub struct Rgb(pub u8, pub u8, pub u8);
132
133impl From<Rgb> for RgbColor {
134    fn from(val: Rgb) -> Self {
135        Self(val.0, val.1, val.2)
136    }
137}
138
139#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
140#[serde(rename_all = "kebab-case")]
141pub struct Config {
142    pub parse_theme: Option<ParseThemeRaw>,
143}
144
145#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
146#[serde(rename_all = "kebab-case")]
147pub struct ParseThemeRaw {
148    pub node_kind: Option<Rgb>,
149    pub node_text: Option<Rgb>,
150    pub field: Option<Rgb>,
151    pub row_color: Option<Rgb>,
152    pub row_color_named: Option<Rgb>,
153    pub extra: Option<Rgb>,
154    pub error: Option<Rgb>,
155    pub missing: Option<Rgb>,
156    pub line_feed: Option<Rgb>,
157    pub backtick: Option<Rgb>,
158    pub literal: Option<Rgb>,
159}
160
161impl From<ParseThemeRaw> for ParseTheme {
162    fn from(value: ParseThemeRaw) -> Self {
163        let val_or_default = |val: Option<Rgb>, default: Option<Color>| -> Option<Color> {
164            val.map_or(default, |v| Some(Color::Rgb(v.into())))
165        };
166        let default = Self::default();
167
168        Self {
169            node_kind: val_or_default(value.node_kind, default.node_kind),
170            node_text: val_or_default(value.node_text, default.node_text),
171            field: val_or_default(value.field, default.field),
172            row_color: val_or_default(value.row_color, default.row_color),
173            row_color_named: val_or_default(value.row_color_named, default.row_color_named),
174            extra: val_or_default(value.extra, default.extra),
175            error: val_or_default(value.error, default.error),
176            missing: val_or_default(value.missing, default.missing),
177            line_feed: val_or_default(value.line_feed, default.line_feed),
178            backtick: val_or_default(value.backtick, default.backtick),
179            literal: val_or_default(value.literal, default.literal),
180        }
181    }
182}
183
184#[derive(Copy, Clone, PartialEq, Eq)]
185pub enum ParseOutput {
186    Normal,
187    Quiet,
188    Xml,
189    Cst,
190    Dot,
191}
192
193/// A position in a multi-line text document, in terms of rows and columns.
194///
195/// Rows and columns are zero-based.
196///
197/// This serves as a serializable wrapper for `Point`
198#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
199pub struct ParsePoint {
200    pub row: usize,
201    pub column: usize,
202}
203
204impl From<Point> for ParsePoint {
205    fn from(value: Point) -> Self {
206        Self {
207            row: value.row,
208            column: value.column,
209        }
210    }
211}
212
213#[derive(Serialize, Default, Debug, Clone)]
214pub struct ParseSummary {
215    pub file: PathBuf,
216    pub successful: bool,
217    pub start: Option<ParsePoint>,
218    pub end: Option<ParsePoint>,
219    pub duration: Option<Duration>,
220    pub bytes: Option<usize>,
221}
222
223impl ParseSummary {
224    #[must_use]
225    pub fn new(path: &Path) -> Self {
226        Self {
227            file: path.to_path_buf(),
228            successful: false,
229            ..Default::default()
230        }
231    }
232}
233
234#[derive(Serialize, Debug)]
235pub struct ParseStats {
236    pub parse_summaries: Vec<ParseSummary>,
237    pub cumulative_stats: Stats,
238    pub source_count: usize,
239}
240
241impl Default for ParseStats {
242    fn default() -> Self {
243        Self {
244            parse_summaries: Vec::new(),
245            cumulative_stats: Stats::default(),
246            source_count: 1,
247        }
248    }
249}
250
251#[derive(Serialize, ValueEnum, Debug, Copy, Clone, Default, Eq, PartialEq)]
252pub enum ParseDebugType {
253    #[default]
254    Quiet,
255    Normal,
256    Pretty,
257}
258
259pub struct ParseFileOptions<'a> {
260    pub edits: &'a [&'a str],
261    pub output: ParseOutput,
262    pub stats: &'a mut ParseStats,
263    pub print_time: bool,
264    pub timeout: u64,
265    pub debug: ParseDebugType,
266    pub debug_graph: bool,
267    pub cancellation_flag: Option<&'a AtomicUsize>,
268    pub encoding: Option<u32>,
269    pub open_log: bool,
270    pub no_ranges: bool,
271    pub parse_theme: &'a ParseTheme,
272}
273
274#[derive(Copy, Clone)]
275pub struct ParseResult {
276    pub successful: bool,
277    pub bytes: usize,
278    pub duration: Option<Duration>,
279}
280
281pub fn parse_file_at_path(
282    parser: &mut Parser,
283    language: &Language,
284    path: &Path,
285    name: &str,
286    max_path_length: usize,
287    opts: &mut ParseFileOptions,
288) -> Result<()> {
289    let mut _log_session = None;
290    parser.set_language(language)?;
291    let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?;
292
293    // Render an HTML graph if `--debug-graph` was passed
294    if opts.debug_graph {
295        _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?);
296    }
297    // Log to stderr if `--debug` was passed
298    else if opts.debug != ParseDebugType::Quiet {
299        let mut curr_version: usize = 0;
300        let use_color = std::env::var("NO_COLOR").map_or(true, |v| v != "1");
301        let debug = opts.debug;
302        parser.set_logger(Some(Box::new(move |log_type, message| {
303            if debug == ParseDebugType::Normal {
304                if log_type == LogType::Lex {
305                    write!(&mut io::stderr(), "  ").unwrap();
306                }
307                writeln!(&mut io::stderr(), "{message}").unwrap();
308            } else {
309                let colors = &[
310                    AnsiColor::White,
311                    AnsiColor::Red,
312                    AnsiColor::Blue,
313                    AnsiColor::Green,
314                    AnsiColor::Cyan,
315                    AnsiColor::Yellow,
316                ];
317                if message.starts_with("process version:") {
318                    let comma_idx = message.find(',').unwrap();
319                    curr_version = message["process version:".len()..comma_idx]
320                        .parse()
321                        .unwrap();
322                }
323                let color = if use_color {
324                    Some(colors[curr_version])
325                } else {
326                    None
327                };
328                let mut out = if log_type == LogType::Lex {
329                    "  ".to_string()
330                } else {
331                    String::new()
332                };
333                out += &paint(color, message);
334                writeln!(&mut io::stderr(), "{out}").unwrap();
335            }
336        })));
337    }
338
339    let parse_time = Instant::now();
340
341    #[inline(always)]
342    fn is_utf16_le_bom(bom_bytes: &[u8]) -> bool {
343        bom_bytes == [0xFF, 0xFE]
344    }
345
346    #[inline(always)]
347    fn is_utf16_be_bom(bom_bytes: &[u8]) -> bool {
348        bom_bytes == [0xFE, 0xFF]
349    }
350
351    let encoding = match opts.encoding {
352        None if source_code.len() >= 2 => {
353            if is_utf16_le_bom(&source_code[0..2]) {
354                Some(ffi::TSInputEncodingUTF16LE)
355            } else if is_utf16_be_bom(&source_code[0..2]) {
356                Some(ffi::TSInputEncodingUTF16BE)
357            } else {
358                None
359            }
360        }
361        _ => opts.encoding,
362    };
363
364    // If the `--cancel` flag was passed, then cancel the parse
365    // when the user types a newline.
366    //
367    // Additionally, if the `--time` flag was passed, end the parse
368    // after the specified number of microseconds.
369    let start_time = Instant::now();
370    let progress_callback = &mut |_: &ParseState| {
371        if let Some(cancellation_flag) = opts.cancellation_flag {
372            if cancellation_flag.load(Ordering::SeqCst) != 0 {
373                return ControlFlow::Break(());
374            }
375        }
376
377        if opts.timeout > 0 && start_time.elapsed().as_micros() > opts.timeout as u128 {
378            return ControlFlow::Break(());
379        }
380
381        ControlFlow::Continue(())
382    };
383
384    let parse_opts = ParseOptions::new().progress_callback(progress_callback);
385
386    let tree = match encoding {
387        Some(encoding) if encoding == ffi::TSInputEncodingUTF16LE => {
388            let source_code_utf16 = source_code
389                .chunks_exact(2)
390                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
391                .collect::<Vec<_>>();
392            parser.parse_utf16_le_with_options(
393                &mut |i, _| {
394                    if i < source_code_utf16.len() {
395                        &source_code_utf16[i..]
396                    } else {
397                        &[]
398                    }
399                },
400                None,
401                Some(parse_opts),
402            )
403        }
404        Some(encoding) if encoding == ffi::TSInputEncodingUTF16BE => {
405            let source_code_utf16 = source_code
406                .chunks_exact(2)
407                .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
408                .collect::<Vec<_>>();
409            parser.parse_utf16_be_with_options(
410                &mut |i, _| {
411                    if i < source_code_utf16.len() {
412                        &source_code_utf16[i..]
413                    } else {
414                        &[]
415                    }
416                },
417                None,
418                Some(parse_opts),
419            )
420        }
421        _ => parser.parse_with_options(
422            &mut |i, _| {
423                if i < source_code.len() {
424                    &source_code[i..]
425                } else {
426                    &[]
427                }
428            },
429            None,
430            Some(parse_opts),
431        ),
432    };
433    let parse_duration = parse_time.elapsed();
434
435    let stdout = io::stdout();
436    let mut stdout = stdout.lock();
437
438    if let Some(mut tree) = tree {
439        if opts.debug_graph && !opts.edits.is_empty() {
440            info!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
441        }
442
443        let edit_time = Instant::now();
444        for (i, edit) in opts.edits.iter().enumerate() {
445            let edit = parse_edit_flag(&source_code, edit)?;
446            perform_edit(&mut tree, &mut source_code, &edit)?;
447            tree = parser.parse(&source_code, Some(&tree)).unwrap();
448
449            if opts.debug_graph {
450                info!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code));
451            }
452        }
453        let edit_duration = edit_time.elapsed();
454
455        parser.stop_printing_dot_graphs();
456
457        let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3;
458        let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3;
459        let mut cursor = tree.walk();
460
461        if opts.output == ParseOutput::Normal {
462            let mut needs_newline = false;
463            let mut indent_level = 0;
464            let mut did_visit_children = false;
465            loop {
466                let node = cursor.node();
467                let is_named = node.is_named();
468                if did_visit_children {
469                    if is_named {
470                        stdout.write_all(b")")?;
471                        needs_newline = true;
472                    }
473                    if cursor.goto_next_sibling() {
474                        did_visit_children = false;
475                    } else if cursor.goto_parent() {
476                        did_visit_children = true;
477                        indent_level -= 1;
478                    } else {
479                        break;
480                    }
481                } else {
482                    if is_named {
483                        if needs_newline {
484                            stdout.write_all(b"\n")?;
485                        }
486                        for _ in 0..indent_level {
487                            stdout.write_all(b"  ")?;
488                        }
489                        let start = node.start_position();
490                        let end = node.end_position();
491                        if let Some(field_name) = cursor.field_name() {
492                            write!(&mut stdout, "{field_name}: ")?;
493                        }
494                        write!(&mut stdout, "({}", node.kind())?;
495                        if !opts.no_ranges {
496                            write!(
497                                &mut stdout,
498                                " [{}, {}] - [{}, {}]",
499                                start.row, start.column, end.row, end.column
500                            )?;
501                        }
502                        needs_newline = true;
503                    }
504                    if cursor.goto_first_child() {
505                        did_visit_children = false;
506                        indent_level += 1;
507                    } else {
508                        did_visit_children = true;
509                    }
510                }
511            }
512            cursor.reset(tree.root_node());
513            println!();
514        }
515
516        if opts.output == ParseOutput::Cst {
517            render_cst(&source_code, &tree, &mut cursor, opts, &mut stdout)?;
518            println!();
519        }
520
521        if opts.output == ParseOutput::Xml {
522            let mut needs_newline = false;
523            let mut indent_level = 2;
524            let mut did_visit_children = false;
525            let mut had_named_children = false;
526            let mut tags = Vec::<&str>::new();
527
528            // If we're parsing the first file, write the header
529            if opts.stats.parse_summaries.is_empty() {
530                writeln!(&mut stdout, "<?xml version=\"1.0\"?>")?;
531                writeln!(&mut stdout, "<sources>")?;
532            }
533            writeln!(&mut stdout, "  <source name=\"{}\">", path.display())?;
534
535            loop {
536                let node = cursor.node();
537                let is_named = node.is_named();
538                if did_visit_children {
539                    if is_named {
540                        let tag = tags.pop();
541                        if had_named_children {
542                            for _ in 0..indent_level {
543                                stdout.write_all(b"  ")?;
544                            }
545                        }
546                        write!(&mut stdout, "</{}>", tag.expect("there is a tag"))?;
547                        // we only write a line in the case where it's the last sibling
548                        if let Some(parent) = node.parent() {
549                            if parent.child(parent.child_count() as u32 - 1).unwrap() == node {
550                                stdout.write_all(b"\n")?;
551                            }
552                        }
553                        needs_newline = true;
554                    }
555                    if cursor.goto_next_sibling() {
556                        did_visit_children = false;
557                        had_named_children = false;
558                    } else if cursor.goto_parent() {
559                        did_visit_children = true;
560                        had_named_children = is_named;
561                        indent_level -= 1;
562                        if !is_named && needs_newline {
563                            stdout.write_all(b"\n")?;
564                            for _ in 0..indent_level {
565                                stdout.write_all(b"  ")?;
566                            }
567                        }
568                    } else {
569                        break;
570                    }
571                } else {
572                    if is_named {
573                        if needs_newline {
574                            stdout.write_all(b"\n")?;
575                        }
576                        for _ in 0..indent_level {
577                            stdout.write_all(b"  ")?;
578                        }
579                        write!(&mut stdout, "<{}", node.kind())?;
580                        if let Some(field_name) = cursor.field_name() {
581                            write!(&mut stdout, " field=\"{field_name}\"")?;
582                        }
583                        let start = node.start_position();
584                        let end = node.end_position();
585                        write!(&mut stdout, " srow=\"{}\"", start.row)?;
586                        write!(&mut stdout, " scol=\"{}\"", start.column)?;
587                        write!(&mut stdout, " erow=\"{}\"", end.row)?;
588                        write!(&mut stdout, " ecol=\"{}\"", end.column)?;
589                        write!(&mut stdout, ">")?;
590                        tags.push(node.kind());
591                        needs_newline = true;
592                    }
593                    if cursor.goto_first_child() {
594                        did_visit_children = false;
595                        had_named_children = false;
596                        indent_level += 1;
597                    } else {
598                        did_visit_children = true;
599                        let start = node.start_byte();
600                        let end = node.end_byte();
601                        let value =
602                            std::str::from_utf8(&source_code[start..end]).expect("has a string");
603                        if !is_named && needs_newline {
604                            stdout.write_all(b"\n")?;
605                            for _ in 0..indent_level {
606                                stdout.write_all(b"  ")?;
607                            }
608                        }
609                        write!(&mut stdout, "{}", html_escape::encode_text(value))?;
610                    }
611                }
612            }
613            writeln!(&mut stdout)?;
614            writeln!(&mut stdout, "  </source>")?;
615
616            // If we parsed the last file, write the closing tag for the `sources` header
617            if opts.stats.parse_summaries.len() == opts.stats.source_count - 1 {
618                writeln!(&mut stdout, "</sources>")?;
619            }
620            cursor.reset(tree.root_node());
621        }
622
623        if opts.output == ParseOutput::Dot {
624            util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap();
625        }
626
627        let mut first_error = None;
628        let mut earliest_node_with_error = None;
629        'outer: loop {
630            let node = cursor.node();
631            if node.has_error() {
632                if earliest_node_with_error.is_none() {
633                    earliest_node_with_error = Some(node);
634                }
635                if node.is_error() || node.is_missing() {
636                    first_error = Some(node);
637                    break;
638                }
639
640                // If there's no more children, even though some outer node has an error,
641                // then that means that the first error is hidden, but the later error could be
642                // visible. So, we walk back up to the child of the first node with an error,
643                // and then check its siblings for errors.
644                if !cursor.goto_first_child() {
645                    let earliest = earliest_node_with_error.unwrap();
646                    while cursor.goto_parent() {
647                        if cursor.node().parent().is_some_and(|p| p == earliest) {
648                            while cursor.goto_next_sibling() {
649                                let sibling = cursor.node();
650                                if sibling.is_error() || sibling.is_missing() {
651                                    first_error = Some(sibling);
652                                    break 'outer;
653                                }
654                                if sibling.has_error() && cursor.goto_first_child() {
655                                    continue 'outer;
656                                }
657                            }
658                            break;
659                        }
660                    }
661                    break;
662                }
663            } else if !cursor.goto_next_sibling() {
664                break;
665            }
666        }
667
668        if first_error.is_some() || opts.print_time {
669            let path = path.to_string_lossy();
670            write!(
671                &mut stdout,
672                "{:width$}\tParse: {parse_duration_ms:>7.2} ms\t{:>6} bytes/ms",
673                name,
674                (source_code.len() as u128 * 1_000_000) / parse_duration.as_nanos(),
675                width = max_path_length
676            )?;
677            if let Some(node) = first_error {
678                let node_kind = node.kind();
679                let mut node_text = String::with_capacity(node_kind.len());
680                for c in node_kind.chars() {
681                    if let Some(escaped) = escape_invisible(c) {
682                        node_text += escaped;
683                    } else {
684                        node_text.push(c);
685                    }
686                }
687                write!(&mut stdout, "\t(")?;
688                if node.is_missing() {
689                    if node.is_named() {
690                        write!(&mut stdout, "MISSING {node_text}")?;
691                    } else {
692                        write!(&mut stdout, "MISSING \"{node_text}\"")?;
693                    }
694                } else {
695                    write!(&mut stdout, "{node_text}")?;
696                }
697
698                let start = node.start_position();
699                let end = node.end_position();
700                write!(
701                    &mut stdout,
702                    " [{}, {}] - [{}, {}])",
703                    start.row, start.column, end.row, end.column
704                )?;
705            }
706            if !opts.edits.is_empty() {
707                write!(
708                    &mut stdout,
709                    "\n{:width$}\tEdit:  {edit_duration_ms:>7.2} ms",
710                    " ".repeat(path.len()),
711                    width = max_path_length,
712                )?;
713            }
714            writeln!(&mut stdout)?;
715        }
716
717        opts.stats.parse_summaries.push(ParseSummary {
718            file: path.to_path_buf(),
719            successful: first_error.is_none(),
720            start: Some(tree.root_node().start_position().into()),
721            end: Some(tree.root_node().end_position().into()),
722            duration: Some(parse_duration),
723            bytes: Some(source_code.len()),
724        });
725
726        return Ok(());
727    }
728    parser.stop_printing_dot_graphs();
729
730    if opts.print_time {
731        let duration = parse_time.elapsed();
732        let duration_ms = duration.as_micros() as f64 / 1e3;
733        writeln!(
734            &mut stdout,
735            "{:width$}\tParse: {duration_ms:>7.2} ms\t(timed out)",
736            path.to_str().unwrap(),
737            width = max_path_length
738        )?;
739    }
740
741    opts.stats.parse_summaries.push(ParseSummary {
742        file: path.to_path_buf(),
743        successful: false,
744        start: None,
745        end: None,
746        duration: None,
747        bytes: Some(source_code.len()),
748    });
749
750    Ok(())
751}
752
753const fn escape_invisible(c: char) -> Option<&'static str> {
754    Some(match c {
755        '\n' => "\\n",
756        '\r' => "\\r",
757        '\t' => "\\t",
758        '\0' => "\\0",
759        '\\' => "\\\\",
760        '\x0b' => "\\v",
761        '\x0c' => "\\f",
762        _ => return None,
763    })
764}
765
766const fn escape_delimiter(c: char) -> Option<&'static str> {
767    Some(match c {
768        '`' => "\\`",
769        '\"' => "\\\"",
770        _ => return None,
771    })
772}
773
774pub fn render_cst<'a, 'b: 'a>(
775    source_code: &[u8],
776    tree: &'b Tree,
777    cursor: &mut TreeCursor<'a>,
778    opts: &ParseFileOptions,
779    out: &mut impl Write,
780) -> Result<()> {
781    let lossy_source_code = String::from_utf8_lossy(source_code);
782    let total_width = lossy_source_code
783        .lines()
784        .enumerate()
785        .map(|(row, col)| (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1)
786        .max()
787        .unwrap_or(1);
788    let mut indent_level = 1;
789    let mut did_visit_children = false;
790    let mut in_error = false;
791    loop {
792        if did_visit_children {
793            if cursor.goto_next_sibling() {
794                did_visit_children = false;
795            } else if cursor.goto_parent() {
796                did_visit_children = true;
797                indent_level -= 1;
798                if !cursor.node().has_error() {
799                    in_error = false;
800                }
801            } else {
802                break;
803            }
804        } else {
805            cst_render_node(
806                opts,
807                cursor,
808                source_code,
809                out,
810                total_width,
811                indent_level,
812                in_error,
813            )?;
814            if cursor.goto_first_child() {
815                did_visit_children = false;
816                indent_level += 1;
817                if cursor.node().has_error() {
818                    in_error = true;
819                }
820            } else {
821                did_visit_children = true;
822            }
823        }
824    }
825    cursor.reset(tree.root_node());
826    Ok(())
827}
828
829fn render_node_text(source: &str) -> String {
830    source
831        .chars()
832        .fold(String::with_capacity(source.len()), |mut acc, c| {
833            if let Some(esc) = escape_invisible(c) {
834                acc.push_str(esc);
835            } else if let Some(esc) = escape_delimiter(c) {
836                acc.push_str(esc);
837            } else {
838                acc.push(c);
839            }
840            acc
841        })
842}
843
844fn write_node_text(
845    opts: &ParseFileOptions,
846    out: &mut impl Write,
847    cursor: &TreeCursor,
848    is_named: bool,
849    source: &str,
850    color: Option<impl Into<Color> + Copy>,
851    text_info: (usize, usize),
852) -> Result<()> {
853    let (total_width, indent_level) = text_info;
854    let (quote, quote_color) = if is_named {
855        ('`', opts.parse_theme.backtick)
856    } else {
857        ('\"', color.map(|c| c.into()))
858    };
859
860    if !is_named {
861        write!(
862            out,
863            "{}{}{}",
864            paint(quote_color, &String::from(quote)),
865            paint(color, &render_node_text(source)),
866            paint(quote_color, &String::from(quote)),
867        )?;
868    } else {
869        let multiline = source.contains('\n');
870        for (i, line) in source.split_inclusive('\n').enumerate() {
871            if line.is_empty() {
872                break;
873            }
874            let mut node_range = cursor.node().range();
875            // For each line of text, adjust the row by shifting it down `i` rows,
876            // and adjust the column by setting it to the length of *this* line.
877            node_range.start_point.row += i;
878            node_range.end_point.row = node_range.start_point.row;
879            node_range.end_point.column = line.len()
880                + if i == 0 {
881                    node_range.start_point.column
882                } else {
883                    0
884                };
885            let formatted_line = render_line_feed(line, opts);
886            if !opts.no_ranges {
887                write!(
888                    out,
889                    "{}{}{}{}{}{}",
890                    if multiline { "\n" } else { "" },
891                    if multiline {
892                        render_node_range(opts, cursor, is_named, true, total_width, node_range)
893                    } else {
894                        String::new()
895                    },
896                    if multiline {
897                        "  ".repeat(indent_level + 1)
898                    } else {
899                        String::new()
900                    },
901                    paint(quote_color, &String::from(quote)),
902                    &paint(color, &render_node_text(&formatted_line)),
903                    paint(quote_color, &String::from(quote)),
904                )?;
905            } else {
906                write!(
907                    out,
908                    "\n{}{}{}{}",
909                    "  ".repeat(indent_level + 1),
910                    paint(quote_color, &String::from(quote)),
911                    &paint(color, &render_node_text(&formatted_line)),
912                    paint(quote_color, &String::from(quote)),
913                )?;
914            }
915        }
916    }
917
918    Ok(())
919}
920
921fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String {
922    if cfg!(windows) {
923        source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n"))
924    } else {
925        source.replace('\n', &paint(opts.parse_theme.line_feed, "\n"))
926    }
927}
928
929fn render_node_range(
930    opts: &ParseFileOptions,
931    cursor: &TreeCursor,
932    is_named: bool,
933    is_multiline: bool,
934    total_width: usize,
935    range: Range,
936) -> String {
937    let has_field_name = cursor.field_name().is_some();
938    let range_color = if is_named && !is_multiline && !has_field_name {
939        opts.parse_theme.row_color_named
940    } else {
941        opts.parse_theme.row_color
942    };
943
944    let remaining_width_start = (total_width
945        - (range.start_point.row as f64).log10() as usize
946        - (range.start_point.column as f64).log10() as usize)
947        .max(1);
948    let remaining_width_end = (total_width
949        - (range.end_point.row as f64).log10() as usize
950        - (range.end_point.column as f64).log10() as usize)
951        .max(1);
952    paint(
953        range_color,
954        &format!(
955            "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}",
956            range.start_point.row,
957            range.start_point.column,
958            ' ',
959            range.end_point.row,
960            range.end_point.column,
961            ' ',
962        ),
963    )
964}
965
966fn cst_render_node(
967    opts: &ParseFileOptions,
968    cursor: &mut TreeCursor,
969    source_code: &[u8],
970    out: &mut impl Write,
971    total_width: usize,
972    indent_level: usize,
973    in_error: bool,
974) -> Result<()> {
975    let node = cursor.node();
976    let is_named = node.is_named();
977    if !opts.no_ranges {
978        write!(
979            out,
980            "{}",
981            render_node_range(opts, cursor, is_named, false, total_width, node.range())
982        )?;
983    }
984    write!(
985        out,
986        "{}{}",
987        "  ".repeat(indent_level),
988        if in_error && !node.has_error() {
989            " "
990        } else {
991            ""
992        }
993    )?;
994    if is_named {
995        if let Some(field_name) = cursor.field_name() {
996            write!(
997                out,
998                "{}",
999                paint(opts.parse_theme.field, &format!("{field_name}: "))
1000            )?;
1001        }
1002
1003        if node.has_error() || node.is_error() {
1004            write!(out, "{}", paint(opts.parse_theme.error, "•"))?;
1005        }
1006
1007        let kind_color = if node.is_error() {
1008            opts.parse_theme.error
1009        } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra() && !p.is_error()) {
1010            opts.parse_theme.extra
1011        } else {
1012            opts.parse_theme.node_kind
1013        };
1014        write!(out, "{}", paint(kind_color, node.kind()),)?;
1015
1016        if node.child_count() == 0 {
1017            write!(out, " ")?;
1018            // Node text from a pattern or external scanner
1019            write_node_text(
1020                opts,
1021                out,
1022                cursor,
1023                is_named,
1024                &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]),
1025                opts.parse_theme.node_text,
1026                (total_width, indent_level),
1027            )?;
1028        }
1029    } else if node.is_missing() {
1030        write!(out, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
1031        write!(out, "\"{}\"", paint(opts.parse_theme.missing, node.kind()))?;
1032    } else {
1033        // Terminal literals, like "fn"
1034        write_node_text(
1035            opts,
1036            out,
1037            cursor,
1038            is_named,
1039            node.kind(),
1040            opts.parse_theme.literal,
1041            (total_width, indent_level),
1042        )?;
1043    }
1044    writeln!(out)?;
1045
1046    Ok(())
1047}
1048
1049pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
1050    let start_byte = edit.position;
1051    let old_end_byte = edit.position + edit.deleted_length;
1052    let new_end_byte = edit.position + edit.inserted_text.len();
1053    let start_position = position_for_offset(input, start_byte)?;
1054    let old_end_position = position_for_offset(input, old_end_byte)?;
1055    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied());
1056    let new_end_position = position_for_offset(input, new_end_byte)?;
1057    let edit = InputEdit {
1058        start_byte,
1059        old_end_byte,
1060        new_end_byte,
1061        start_position,
1062        old_end_position,
1063        new_end_position,
1064    };
1065    tree.edit(&edit);
1066    Ok(edit)
1067}
1068
1069fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result<Edit> {
1070    let error = || {
1071        anyhow!(concat!(
1072            "Invalid edit string '{}'. ",
1073            "Edit strings must match the pattern '<START_BYTE_OR_POSITION> <REMOVED_LENGTH> <NEW_TEXT>'"
1074        ), flag)
1075    };
1076
1077    // Three whitespace-separated parts:
1078    // * edit position
1079    // * deleted length
1080    // * inserted text
1081    let mut parts = flag.split(' ');
1082    let position = parts.next().ok_or_else(error)?;
1083    let deleted_length = parts.next().ok_or_else(error)?;
1084    let inserted_text = parts.collect::<Vec<_>>().join(" ").into_bytes();
1085
1086    // Position can either be a byte_offset or row,column pair, separated by a comma
1087    let position = if position == "$" {
1088        source_code.len()
1089    } else if position.contains(',') {
1090        let mut parts = position.split(',');
1091        let row = parts.next().ok_or_else(error)?;
1092        let row = row.parse::<usize>().map_err(|_| error())?;
1093        let column = parts.next().ok_or_else(error)?;
1094        let column = column.parse::<usize>().map_err(|_| error())?;
1095        offset_for_position(source_code, Point { row, column })?
1096    } else {
1097        position.parse::<usize>().map_err(|_| error())?
1098    };
1099
1100    // Deleted length must be a byte count.
1101    let deleted_length = deleted_length.parse::<usize>().map_err(|_| error())?;
1102
1103    Ok(Edit {
1104        position,
1105        deleted_length,
1106        inserted_text,
1107    })
1108}
1109
1110pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
1111    let mut row = 0;
1112    let mut offset = 0;
1113    let mut iter = memchr::memchr_iter(b'\n', input);
1114    loop {
1115        if let Some(pos) = iter.next() {
1116            if row < position.row {
1117                row += 1;
1118                offset = pos;
1119                continue;
1120            }
1121        }
1122        offset += 1;
1123        break;
1124    }
1125    if position.row - row > 0 {
1126        return Err(anyhow!("Failed to address a row: {}", position.row));
1127    }
1128    if let Some(pos) = iter.next() {
1129        if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
1130            return Err(anyhow!("Failed to address a column: {}", position.column));
1131        }
1132    } else if input.len() - offset < position.column {
1133        return Err(anyhow!("Failed to address a column over the end"));
1134    }
1135    Ok(offset + position.column)
1136}
1137
1138pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
1139    if offset > input.len() {
1140        return Err(anyhow!("Failed to address an offset: {offset}"));
1141    }
1142    let mut result = Point { row: 0, column: 0 };
1143    let mut last = 0;
1144    for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
1145        result.row += 1;
1146        last = pos;
1147    }
1148    result.column = if result.row > 0 {
1149        offset - last - 1
1150    } else {
1151        offset
1152    };
1153    Ok(result)
1154}