tree_sitter_cli/
parse.rs

1use std::{
2    fmt, fs,
3    io::{self, StdoutLock, Write},
4    path::{Path, PathBuf},
5    sync::atomic::{AtomicUsize, Ordering},
6    time::{Duration, Instant},
7};
8
9use anstyle::{AnsiColor, Color, RgbColor};
10use anyhow::{anyhow, Context, Result};
11use clap::ValueEnum;
12use serde::{Deserialize, Serialize};
13use tree_sitter::{
14    ffi, InputEdit, Language, LogType, ParseOptions, ParseState, Parser, Point, Range, Tree,
15    TreeCursor,
16};
17
18use super::util;
19use crate::{fuzz::edits::Edit, test::paint};
20
21#[derive(Debug, Default, Serialize)]
22pub struct Stats {
23    pub successful_parses: usize,
24    pub total_parses: usize,
25    pub total_bytes: usize,
26    pub total_duration: Duration,
27}
28
29impl fmt::Display for Stats {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        let duration_us = self.total_duration.as_micros();
32        let success_rate = if self.total_parses > 0 {
33            format!(
34                "{:.2}%",
35                ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0,
36            )
37        } else {
38            "N/A".to_string()
39        };
40        let duration_str = match (self.total_parses, duration_us) {
41            (0, _) => "N/A".to_string(),
42            (_, 0) => "0 bytes/ms".to_string(),
43            (_, _) => format!(
44                "{} bytes/ms",
45                ((self.total_bytes as u128) * 1_000) / duration_us
46            ),
47        };
48        writeln!(
49            f,
50            "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {success_rate}; average speed: {duration_str}",
51            self.total_parses,
52            self.successful_parses,
53            self.total_parses - self.successful_parses,
54        )
55    }
56}
57
58/// Sets the color used in the output of `tree-sitter parse --cst`
59#[derive(Debug, Copy, Clone)]
60pub struct ParseTheme {
61    /// The color of node kinds
62    pub node_kind: Option<Color>,
63    /// The color of text associated with a node
64    pub node_text: Option<Color>,
65    /// The color of node fields
66    pub field: Option<Color>,
67    /// The color of the range information for unnamed nodes
68    pub row_color: Option<Color>,
69    /// The color of the range information for named nodes
70    pub row_color_named: Option<Color>,
71    /// The color of extra nodes
72    pub extra: Option<Color>,
73    /// The color of ERROR nodes
74    pub error: Option<Color>,
75    /// The color of MISSING nodes and their associated text
76    pub missing: Option<Color>,
77    /// The color of newline characters
78    pub line_feed: Option<Color>,
79    /// The color of backticks
80    pub backtick: Option<Color>,
81    /// The color of literals
82    pub literal: Option<Color>,
83}
84
85impl ParseTheme {
86    const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118));
87    const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181));
88    const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51));
89    const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173));
90    const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67));
91
92    #[must_use]
93    pub const fn empty() -> Self {
94        Self {
95            node_kind: None,
96            node_text: None,
97            field: None,
98            row_color: None,
99            row_color_named: None,
100            extra: None,
101            error: None,
102            missing: None,
103            line_feed: None,
104            backtick: None,
105            literal: None,
106        }
107    }
108}
109
110impl Default for ParseTheme {
111    fn default() -> Self {
112        Self {
113            node_kind: Some(AnsiColor::BrightCyan.into()),
114            node_text: Some(Self::GRAY),
115            field: Some(AnsiColor::Blue.into()),
116            row_color: Some(AnsiColor::White.into()),
117            row_color_named: Some(AnsiColor::BrightCyan.into()),
118            extra: Some(AnsiColor::BrightMagenta.into()),
119            error: Some(AnsiColor::Red.into()),
120            missing: Some(Self::ORANGE),
121            line_feed: Some(Self::LIGHT_GRAY),
122            backtick: Some(Self::GREEN),
123            literal: Some(Self::YELLOW),
124        }
125    }
126}
127
128#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
129pub struct Rgb(pub u8, pub u8, pub u8);
130
131impl From<Rgb> for RgbColor {
132    fn from(val: Rgb) -> Self {
133        Self(val.0, val.1, val.2)
134    }
135}
136
137#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
138#[serde(rename_all = "kebab-case")]
139pub struct Config {
140    pub parse_theme: Option<ParseThemeRaw>,
141}
142
143#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
144#[serde(rename_all = "kebab-case")]
145pub struct ParseThemeRaw {
146    pub node_kind: Option<Rgb>,
147    pub node_text: Option<Rgb>,
148    pub field: Option<Rgb>,
149    pub row_color: Option<Rgb>,
150    pub row_color_named: Option<Rgb>,
151    pub extra: Option<Rgb>,
152    pub error: Option<Rgb>,
153    pub missing: Option<Rgb>,
154    pub line_feed: Option<Rgb>,
155    pub backtick: Option<Rgb>,
156    pub literal: Option<Rgb>,
157}
158
159impl From<ParseThemeRaw> for ParseTheme {
160    fn from(value: ParseThemeRaw) -> Self {
161        let val_or_default = |val: Option<Rgb>, default: Option<Color>| -> Option<Color> {
162            val.map_or(default, |v| Some(Color::Rgb(v.into())))
163        };
164        let default = Self::default();
165
166        Self {
167            node_kind: val_or_default(value.node_kind, default.node_kind),
168            node_text: val_or_default(value.node_text, default.node_text),
169            field: val_or_default(value.field, default.field),
170            row_color: val_or_default(value.row_color, default.row_color),
171            row_color_named: val_or_default(value.row_color_named, default.row_color_named),
172            extra: val_or_default(value.extra, default.extra),
173            error: val_or_default(value.error, default.error),
174            missing: val_or_default(value.missing, default.missing),
175            line_feed: val_or_default(value.line_feed, default.line_feed),
176            backtick: val_or_default(value.backtick, default.backtick),
177            literal: val_or_default(value.literal, default.literal),
178        }
179    }
180}
181
182#[derive(Copy, Clone, PartialEq, Eq)]
183pub enum ParseOutput {
184    Normal,
185    Quiet,
186    Xml,
187    Cst,
188    Dot,
189}
190
191/// A position in a multi-line text document, in terms of rows and columns.
192///
193/// Rows and columns are zero-based.
194///
195/// This serves as a serializable wrapper for `Point`
196#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
197pub struct ParsePoint {
198    pub row: usize,
199    pub column: usize,
200}
201
202impl From<Point> for ParsePoint {
203    fn from(value: Point) -> Self {
204        Self {
205            row: value.row,
206            column: value.column,
207        }
208    }
209}
210
211#[derive(Serialize, Default, Debug, Clone)]
212pub struct ParseSummary {
213    pub file: PathBuf,
214    pub successful: bool,
215    pub start: Option<ParsePoint>,
216    pub end: Option<ParsePoint>,
217    pub duration: Option<Duration>,
218    pub bytes: Option<usize>,
219}
220
221impl ParseSummary {
222    #[must_use]
223    pub fn new(path: &Path) -> Self {
224        Self {
225            file: path.to_path_buf(),
226            successful: false,
227            ..Default::default()
228        }
229    }
230}
231
232#[derive(Serialize, Debug, Default)]
233pub struct ParseStats {
234    pub parse_summaries: Vec<ParseSummary>,
235    pub cumulative_stats: Stats,
236}
237
238#[derive(Serialize, ValueEnum, Debug, Copy, Clone, Default, Eq, PartialEq)]
239pub enum ParseDebugType {
240    #[default]
241    Quiet,
242    Normal,
243    Pretty,
244}
245
246pub struct ParseFileOptions<'a> {
247    pub edits: &'a [&'a str],
248    pub output: ParseOutput,
249    pub stats: &'a mut ParseStats,
250    pub print_time: bool,
251    pub timeout: u64,
252    pub debug: ParseDebugType,
253    pub debug_graph: bool,
254    pub cancellation_flag: Option<&'a AtomicUsize>,
255    pub encoding: Option<u32>,
256    pub open_log: bool,
257    pub no_ranges: bool,
258    pub parse_theme: &'a ParseTheme,
259}
260
261#[derive(Copy, Clone)]
262pub struct ParseResult {
263    pub successful: bool,
264    pub bytes: usize,
265    pub duration: Option<Duration>,
266}
267
268pub fn parse_file_at_path(
269    parser: &mut Parser,
270    language: &Language,
271    path: &Path,
272    name: &str,
273    max_path_length: usize,
274    opts: &mut ParseFileOptions,
275) -> Result<()> {
276    let mut _log_session = None;
277    parser.set_language(language)?;
278    let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?;
279
280    // Render an HTML graph if `--debug-graph` was passed
281    if opts.debug_graph {
282        _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?);
283    }
284    // Log to stderr if `--debug` was passed
285    else if opts.debug != ParseDebugType::Quiet {
286        let mut curr_version: usize = 0;
287        let use_color = std::env::var("NO_COLOR").map_or(true, |v| v != "1");
288        let debug = opts.debug;
289        parser.set_logger(Some(Box::new(move |log_type, message| {
290            if debug == ParseDebugType::Normal {
291                if log_type == LogType::Lex {
292                    write!(&mut io::stderr(), "  ").unwrap();
293                }
294                writeln!(&mut io::stderr(), "{message}").unwrap();
295            } else {
296                let colors = &[
297                    AnsiColor::White,
298                    AnsiColor::Red,
299                    AnsiColor::Blue,
300                    AnsiColor::Green,
301                    AnsiColor::Cyan,
302                    AnsiColor::Yellow,
303                ];
304                if message.starts_with("process version:") {
305                    let comma_idx = message.find(',').unwrap();
306                    curr_version = message["process version:".len()..comma_idx]
307                        .parse()
308                        .unwrap();
309                }
310                let color = if use_color {
311                    Some(colors[curr_version])
312                } else {
313                    None
314                };
315                let mut out = if log_type == LogType::Lex {
316                    "  ".to_string()
317                } else {
318                    String::new()
319                };
320                out += &paint(color, message);
321                writeln!(&mut io::stderr(), "{out}").unwrap();
322            }
323        })));
324    }
325
326    let parse_time = Instant::now();
327
328    #[inline(always)]
329    fn is_utf16_le_bom(bom_bytes: &[u8]) -> bool {
330        bom_bytes == [0xFF, 0xFE]
331    }
332
333    #[inline(always)]
334    fn is_utf16_be_bom(bom_bytes: &[u8]) -> bool {
335        bom_bytes == [0xFE, 0xFF]
336    }
337
338    let encoding = match opts.encoding {
339        None if source_code.len() >= 2 => {
340            if is_utf16_le_bom(&source_code[0..2]) {
341                Some(ffi::TSInputEncodingUTF16LE)
342            } else if is_utf16_be_bom(&source_code[0..2]) {
343                Some(ffi::TSInputEncodingUTF16BE)
344            } else {
345                None
346            }
347        }
348        _ => opts.encoding,
349    };
350
351    // If the `--cancel` flag was passed, then cancel the parse
352    // when the user types a newline.
353    //
354    // Additionally, if the `--time` flag was passed, end the parse
355    // after the specified number of microseconds.
356    let start_time = Instant::now();
357    let progress_callback = &mut |_: &ParseState| {
358        if let Some(cancellation_flag) = opts.cancellation_flag {
359            if cancellation_flag.load(Ordering::SeqCst) != 0 {
360                return true;
361            }
362        }
363
364        if opts.timeout > 0 && start_time.elapsed().as_micros() > opts.timeout as u128 {
365            return true;
366        }
367
368        false
369    };
370
371    let parse_opts = ParseOptions::new().progress_callback(progress_callback);
372
373    let tree = match encoding {
374        Some(encoding) if encoding == ffi::TSInputEncodingUTF16LE => {
375            let source_code_utf16 = source_code
376                .chunks_exact(2)
377                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
378                .collect::<Vec<_>>();
379            parser.parse_utf16_le_with_options(
380                &mut |i, _| {
381                    if i < source_code_utf16.len() {
382                        &source_code_utf16[i..]
383                    } else {
384                        &[]
385                    }
386                },
387                None,
388                Some(parse_opts),
389            )
390        }
391        Some(encoding) if encoding == ffi::TSInputEncodingUTF16BE => {
392            let source_code_utf16 = source_code
393                .chunks_exact(2)
394                .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
395                .collect::<Vec<_>>();
396            parser.parse_utf16_be_with_options(
397                &mut |i, _| {
398                    if i < source_code_utf16.len() {
399                        &source_code_utf16[i..]
400                    } else {
401                        &[]
402                    }
403                },
404                None,
405                Some(parse_opts),
406            )
407        }
408        _ => parser.parse_with_options(
409            &mut |i, _| {
410                if i < source_code.len() {
411                    &source_code[i..]
412                } else {
413                    &[]
414                }
415            },
416            None,
417            Some(parse_opts),
418        ),
419    };
420    let parse_duration = parse_time.elapsed();
421
422    let stdout = io::stdout();
423    let mut stdout = stdout.lock();
424
425    if let Some(mut tree) = tree {
426        if opts.debug_graph && !opts.edits.is_empty() {
427            println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
428        }
429
430        let edit_time = Instant::now();
431        for (i, edit) in opts.edits.iter().enumerate() {
432            let edit = parse_edit_flag(&source_code, edit)?;
433            perform_edit(&mut tree, &mut source_code, &edit)?;
434            tree = parser.parse(&source_code, Some(&tree)).unwrap();
435
436            if opts.debug_graph {
437                println!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code));
438            }
439        }
440        let edit_duration = edit_time.elapsed();
441
442        parser.stop_printing_dot_graphs();
443
444        let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3;
445        let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3;
446        let mut cursor = tree.walk();
447
448        if opts.output == ParseOutput::Normal {
449            let mut needs_newline = false;
450            let mut indent_level = 0;
451            let mut did_visit_children = false;
452            loop {
453                let node = cursor.node();
454                let is_named = node.is_named();
455                if did_visit_children {
456                    if is_named {
457                        stdout.write_all(b")")?;
458                        needs_newline = true;
459                    }
460                    if cursor.goto_next_sibling() {
461                        did_visit_children = false;
462                    } else if cursor.goto_parent() {
463                        did_visit_children = true;
464                        indent_level -= 1;
465                    } else {
466                        break;
467                    }
468                } else {
469                    if is_named {
470                        if needs_newline {
471                            stdout.write_all(b"\n")?;
472                        }
473                        for _ in 0..indent_level {
474                            stdout.write_all(b"  ")?;
475                        }
476                        let start = node.start_position();
477                        let end = node.end_position();
478                        if let Some(field_name) = cursor.field_name() {
479                            write!(&mut stdout, "{field_name}: ")?;
480                        }
481                        write!(&mut stdout, "({}", node.kind())?;
482                        if !opts.no_ranges {
483                            write!(
484                                &mut stdout,
485                                " [{}, {}] - [{}, {}]",
486                                start.row, start.column, end.row, end.column
487                            )?;
488                        }
489                        needs_newline = true;
490                    }
491                    if cursor.goto_first_child() {
492                        did_visit_children = false;
493                        indent_level += 1;
494                    } else {
495                        did_visit_children = true;
496                    }
497                }
498            }
499            cursor.reset(tree.root_node());
500            println!();
501        }
502
503        if opts.output == ParseOutput::Cst {
504            let lossy_source_code = String::from_utf8_lossy(&source_code);
505            let total_width = lossy_source_code
506                .lines()
507                .enumerate()
508                .map(|(row, col)| {
509                    (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1
510                })
511                .max()
512                .unwrap_or(1);
513            let mut indent_level = 1;
514            let mut did_visit_children = false;
515            let mut in_error = false;
516            loop {
517                if did_visit_children {
518                    if cursor.goto_next_sibling() {
519                        did_visit_children = false;
520                    } else if cursor.goto_parent() {
521                        did_visit_children = true;
522                        indent_level -= 1;
523                        if !cursor.node().has_error() {
524                            in_error = false;
525                        }
526                    } else {
527                        break;
528                    }
529                } else {
530                    cst_render_node(
531                        opts,
532                        &mut cursor,
533                        &source_code,
534                        &mut stdout,
535                        total_width,
536                        indent_level,
537                        in_error,
538                    )?;
539                    if cursor.goto_first_child() {
540                        did_visit_children = false;
541                        indent_level += 1;
542                        if cursor.node().has_error() {
543                            in_error = true;
544                        }
545                    } else {
546                        did_visit_children = true;
547                    }
548                }
549            }
550            cursor.reset(tree.root_node());
551            println!();
552        }
553
554        if opts.output == ParseOutput::Xml {
555            let mut needs_newline = false;
556            let mut indent_level = 0;
557            let mut did_visit_children = false;
558            let mut had_named_children = false;
559            let mut tags = Vec::<&str>::new();
560            writeln!(&mut stdout, "<?xml version=\"1.0\"?>")?;
561            loop {
562                let node = cursor.node();
563                let is_named = node.is_named();
564                if did_visit_children {
565                    if is_named {
566                        let tag = tags.pop();
567                        if had_named_children {
568                            for _ in 0..indent_level {
569                                stdout.write_all(b"  ")?;
570                            }
571                        }
572                        write!(&mut stdout, "</{}>", tag.expect("there is a tag"))?;
573                        // we only write a line in the case where it's the last sibling
574                        if let Some(parent) = node.parent() {
575                            if parent.child(parent.child_count() - 1).unwrap() == node {
576                                stdout.write_all(b"\n")?;
577                            }
578                        }
579                        needs_newline = true;
580                    }
581                    if cursor.goto_next_sibling() {
582                        did_visit_children = false;
583                        had_named_children = false;
584                    } else if cursor.goto_parent() {
585                        did_visit_children = true;
586                        had_named_children = is_named;
587                        indent_level -= 1;
588                        if !is_named && needs_newline {
589                            stdout.write_all(b"\n")?;
590                            for _ in 0..indent_level {
591                                stdout.write_all(b"  ")?;
592                            }
593                        }
594                    } else {
595                        break;
596                    }
597                } else {
598                    if is_named {
599                        if needs_newline {
600                            stdout.write_all(b"\n")?;
601                        }
602                        for _ in 0..indent_level {
603                            stdout.write_all(b"  ")?;
604                        }
605                        write!(&mut stdout, "<{}", node.kind())?;
606                        if let Some(field_name) = cursor.field_name() {
607                            write!(&mut stdout, " field=\"{field_name}\"")?;
608                        }
609                        let start = node.start_position();
610                        let end = node.end_position();
611                        write!(&mut stdout, " srow=\"{}\"", start.row)?;
612                        write!(&mut stdout, " scol=\"{}\"", start.column)?;
613                        write!(&mut stdout, " erow=\"{}\"", end.row)?;
614                        write!(&mut stdout, " ecol=\"{}\"", end.column)?;
615                        write!(&mut stdout, ">")?;
616                        tags.push(node.kind());
617                        needs_newline = true;
618                    }
619                    if cursor.goto_first_child() {
620                        did_visit_children = false;
621                        had_named_children = false;
622                        indent_level += 1;
623                    } else {
624                        did_visit_children = true;
625                        let start = node.start_byte();
626                        let end = node.end_byte();
627                        let value =
628                            std::str::from_utf8(&source_code[start..end]).expect("has a string");
629                        if !is_named && needs_newline {
630                            stdout.write_all(b"\n")?;
631                            for _ in 0..indent_level {
632                                stdout.write_all(b"  ")?;
633                            }
634                        }
635                        write!(&mut stdout, "{}", html_escape::encode_text(value))?;
636                    }
637                }
638            }
639            cursor.reset(tree.root_node());
640            println!();
641        }
642
643        if opts.output == ParseOutput::Dot {
644            util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap();
645        }
646
647        let mut first_error = None;
648        let mut earliest_node_with_error = None;
649        'outer: loop {
650            let node = cursor.node();
651            if node.has_error() {
652                if earliest_node_with_error.is_none() {
653                    earliest_node_with_error = Some(node);
654                }
655                if node.is_error() || node.is_missing() {
656                    first_error = Some(node);
657                    break;
658                }
659
660                // If there's no more children, even though some outer node has an error,
661                // then that means that the first error is hidden, but the later error could be
662                // visible. So, we walk back up to the child of the first node with an error,
663                // and then check its siblings for errors.
664                if !cursor.goto_first_child() {
665                    let earliest = earliest_node_with_error.unwrap();
666                    while cursor.goto_parent() {
667                        if cursor.node().parent().is_some_and(|p| p == earliest) {
668                            while cursor.goto_next_sibling() {
669                                let sibling = cursor.node();
670                                if sibling.is_error() || sibling.is_missing() {
671                                    first_error = Some(sibling);
672                                    break 'outer;
673                                }
674                                if sibling.has_error() && cursor.goto_first_child() {
675                                    continue 'outer;
676                                }
677                            }
678                            break;
679                        }
680                    }
681                    break;
682                }
683            } else if !cursor.goto_next_sibling() {
684                break;
685            }
686        }
687
688        if first_error.is_some() || opts.print_time {
689            let path = path.to_string_lossy();
690            write!(
691                &mut stdout,
692                "{:width$}\tParse: {parse_duration_ms:>7.2} ms\t{:>6} bytes/ms",
693                name,
694                (source_code.len() as u128 * 1_000_000) / parse_duration.as_nanos(),
695                width = max_path_length
696            )?;
697            if let Some(node) = first_error {
698                let start = node.start_position();
699                let end = node.end_position();
700                let mut node_text = String::new();
701                for c in node.kind().chars() {
702                    if let Some(escaped) = escape_invisible(c) {
703                        node_text += escaped;
704                    } else {
705                        node_text.push(c);
706                    }
707                }
708                write!(&mut stdout, "\t(")?;
709                if node.is_missing() {
710                    if node.is_named() {
711                        write!(&mut stdout, "MISSING {node_text}")?;
712                    } else {
713                        write!(&mut stdout, "MISSING \"{node_text}\"")?;
714                    }
715                } else {
716                    write!(&mut stdout, "{node_text}")?;
717                }
718                write!(
719                    &mut stdout,
720                    " [{}, {}] - [{}, {}])",
721                    start.row, start.column, end.row, end.column
722                )?;
723            }
724            if !opts.edits.is_empty() {
725                write!(
726                    &mut stdout,
727                    "\n{:width$}\tEdit:  {edit_duration_ms:>7.2} ms",
728                    " ".repeat(path.len()),
729                    width = max_path_length,
730                )?;
731            }
732            writeln!(&mut stdout)?;
733        }
734
735        opts.stats.parse_summaries.push(ParseSummary {
736            file: path.to_path_buf(),
737            successful: first_error.is_none(),
738            start: Some(tree.root_node().start_position().into()),
739            end: Some(tree.root_node().end_position().into()),
740            duration: Some(parse_duration),
741            bytes: Some(source_code.len()),
742        });
743
744        return Ok(());
745    }
746    parser.stop_printing_dot_graphs();
747
748    if opts.print_time {
749        let duration = parse_time.elapsed();
750        let duration_ms = duration.as_micros() as f64 / 1e3;
751        writeln!(
752            &mut stdout,
753            "{:width$}\tParse: {duration_ms:>7.2} ms\t(timed out)",
754            path.to_str().unwrap(),
755            width = max_path_length
756        )?;
757    }
758
759    opts.stats.parse_summaries.push(ParseSummary {
760        file: path.to_path_buf(),
761        successful: false,
762        start: None,
763        end: None,
764        duration: None,
765        bytes: Some(source_code.len()),
766    });
767
768    Ok(())
769}
770
771const fn escape_invisible(c: char) -> Option<&'static str> {
772    Some(match c {
773        '\n' => "\\n",
774        '\r' => "\\r",
775        '\t' => "\\t",
776        '\0' => "\\0",
777        '\\' => "\\\\",
778        '\x0b' => "\\v",
779        '\x0c' => "\\f",
780        _ => return None,
781    })
782}
783
784fn render_node_text(source: &str) -> String {
785    source
786        .chars()
787        .fold(String::with_capacity(source.len()), |mut acc, c| {
788            if let Some(esc) = escape_invisible(c) {
789                acc.push_str(esc);
790            } else {
791                acc.push(c);
792            }
793            acc
794        })
795}
796
797fn write_node_text(
798    opts: &ParseFileOptions,
799    stdout: &mut StdoutLock<'static>,
800    cursor: &TreeCursor,
801    is_named: bool,
802    source: &str,
803    color: Option<impl Into<Color> + Copy>,
804    text_info: (usize, usize),
805) -> Result<()> {
806    let (total_width, indent_level) = text_info;
807    let (quote, quote_color) = if is_named {
808        ('`', opts.parse_theme.backtick)
809    } else {
810        ('\"', color.map(|c| c.into()))
811    };
812
813    if !is_named {
814        write!(
815            stdout,
816            "{}{}{}",
817            paint(quote_color, &String::from(quote)),
818            paint(color, &render_node_text(source)),
819            paint(quote_color, &String::from(quote)),
820        )?;
821    } else {
822        let multiline = source.contains('\n');
823        for (i, line) in source.split_inclusive('\n').enumerate() {
824            if line.is_empty() {
825                break;
826            }
827            let mut node_range = cursor.node().range();
828            // For each line of text, adjust the row by shifting it down `i` rows,
829            // and adjust the column by setting it to the length of *this* line.
830            node_range.start_point.row += i;
831            node_range.end_point.row = node_range.start_point.row;
832            node_range.end_point.column = line.len()
833                + if i == 0 {
834                    node_range.start_point.column
835                } else {
836                    0
837                };
838            let formatted_line = render_line_feed(line, opts);
839            if !opts.no_ranges {
840                write!(
841                    stdout,
842                    "{}{}{}{}{}{}",
843                    if multiline { "\n" } else { "" },
844                    if multiline {
845                        render_node_range(opts, cursor, is_named, true, total_width, node_range)
846                    } else {
847                        String::new()
848                    },
849                    if multiline {
850                        "  ".repeat(indent_level + 1)
851                    } else {
852                        String::new()
853                    },
854                    paint(quote_color, &String::from(quote)),
855                    &paint(color, &render_node_text(&formatted_line)),
856                    paint(quote_color, &String::from(quote)),
857                )?;
858            } else {
859                write!(
860                    stdout,
861                    "\n{}{}{}{}",
862                    "  ".repeat(indent_level + 1),
863                    paint(quote_color, &String::from(quote)),
864                    &paint(color, &render_node_text(&formatted_line)),
865                    paint(quote_color, &String::from(quote)),
866                )?;
867            }
868        }
869    }
870
871    Ok(())
872}
873
874fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String {
875    if cfg!(windows) {
876        source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n"))
877    } else {
878        source.replace('\n', &paint(opts.parse_theme.line_feed, "\n"))
879    }
880}
881
882fn render_node_range(
883    opts: &ParseFileOptions,
884    cursor: &TreeCursor,
885    is_named: bool,
886    is_multiline: bool,
887    total_width: usize,
888    range: Range,
889) -> String {
890    let has_field_name = cursor.field_name().is_some();
891    let range_color = if is_named && !is_multiline && !has_field_name {
892        opts.parse_theme.row_color_named
893    } else {
894        opts.parse_theme.row_color
895    };
896
897    let remaining_width_start = (total_width
898        - (range.start_point.row as f64).log10() as usize
899        - (range.start_point.column as f64).log10() as usize)
900        .max(1);
901    let remaining_width_end = (total_width
902        - (range.end_point.row as f64).log10() as usize
903        - (range.end_point.column as f64).log10() as usize)
904        .max(1);
905    paint(
906        range_color,
907        &format!(
908            "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}",
909            range.start_point.row,
910            range.start_point.column,
911            ' ',
912            range.end_point.row,
913            range.end_point.column,
914            ' ',
915        ),
916    )
917}
918
919fn cst_render_node(
920    opts: &ParseFileOptions,
921    cursor: &mut TreeCursor,
922    source_code: &[u8],
923    stdout: &mut StdoutLock<'static>,
924    total_width: usize,
925    indent_level: usize,
926    in_error: bool,
927) -> Result<()> {
928    let node = cursor.node();
929    let is_named = node.is_named();
930    if !opts.no_ranges {
931        write!(
932            stdout,
933            "{}",
934            render_node_range(opts, cursor, is_named, false, total_width, node.range())
935        )?;
936    }
937    write!(
938        stdout,
939        "{}{}",
940        "  ".repeat(indent_level),
941        if in_error && !node.has_error() {
942            " "
943        } else {
944            ""
945        }
946    )?;
947    if is_named {
948        if let Some(field_name) = cursor.field_name() {
949            write!(
950                stdout,
951                "{}",
952                paint(opts.parse_theme.field, &format!("{field_name}: "))
953            )?;
954        }
955
956        if node.has_error() || node.is_error() {
957            write!(stdout, "{}", paint(opts.parse_theme.error, "•"))?;
958        }
959
960        let kind_color = if node.is_error() {
961            opts.parse_theme.error
962        } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra() && !p.is_error()) {
963            opts.parse_theme.extra
964        } else {
965            opts.parse_theme.node_kind
966        };
967        write!(stdout, "{} ", paint(kind_color, node.kind()))?;
968
969        if node.child_count() == 0 {
970            // Node text from a pattern or external scanner
971            write_node_text(
972                opts,
973                stdout,
974                cursor,
975                is_named,
976                &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]),
977                opts.parse_theme.node_text,
978                (total_width, indent_level),
979            )?;
980        }
981    } else if node.is_missing() {
982        write!(stdout, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
983        write!(
984            stdout,
985            "\"{}\"",
986            paint(opts.parse_theme.missing, node.kind())
987        )?;
988    } else {
989        // Terminal literals, like "fn"
990        write_node_text(
991            opts,
992            stdout,
993            cursor,
994            is_named,
995            node.kind(),
996            opts.parse_theme.literal,
997            (total_width, indent_level),
998        )?;
999    }
1000    writeln!(stdout)?;
1001
1002    Ok(())
1003}
1004
1005pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
1006    let start_byte = edit.position;
1007    let old_end_byte = edit.position + edit.deleted_length;
1008    let new_end_byte = edit.position + edit.inserted_text.len();
1009    let start_position = position_for_offset(input, start_byte)?;
1010    let old_end_position = position_for_offset(input, old_end_byte)?;
1011    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied());
1012    let new_end_position = position_for_offset(input, new_end_byte)?;
1013    let edit = InputEdit {
1014        start_byte,
1015        old_end_byte,
1016        new_end_byte,
1017        start_position,
1018        old_end_position,
1019        new_end_position,
1020    };
1021    tree.edit(&edit);
1022    Ok(edit)
1023}
1024
1025fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result<Edit> {
1026    let error = || {
1027        anyhow!(concat!(
1028            "Invalid edit string '{}'. ",
1029            "Edit strings must match the pattern '<START_BYTE_OR_POSITION> <REMOVED_LENGTH> <NEW_TEXT>'"
1030        ), flag)
1031    };
1032
1033    // Three whitespace-separated parts:
1034    // * edit position
1035    // * deleted length
1036    // * inserted text
1037    let mut parts = flag.split(' ');
1038    let position = parts.next().ok_or_else(error)?;
1039    let deleted_length = parts.next().ok_or_else(error)?;
1040    let inserted_text = parts.collect::<Vec<_>>().join(" ").into_bytes();
1041
1042    // Position can either be a byte_offset or row,column pair, separated by a comma
1043    let position = if position == "$" {
1044        source_code.len()
1045    } else if position.contains(',') {
1046        let mut parts = position.split(',');
1047        let row = parts.next().ok_or_else(error)?;
1048        let row = row.parse::<usize>().map_err(|_| error())?;
1049        let column = parts.next().ok_or_else(error)?;
1050        let column = column.parse::<usize>().map_err(|_| error())?;
1051        offset_for_position(source_code, Point { row, column })?
1052    } else {
1053        position.parse::<usize>().map_err(|_| error())?
1054    };
1055
1056    // Deleted length must be a byte count.
1057    let deleted_length = deleted_length.parse::<usize>().map_err(|_| error())?;
1058
1059    Ok(Edit {
1060        position,
1061        deleted_length,
1062        inserted_text,
1063    })
1064}
1065
1066pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
1067    let mut row = 0;
1068    let mut offset = 0;
1069    let mut iter = memchr::memchr_iter(b'\n', input);
1070    loop {
1071        if let Some(pos) = iter.next() {
1072            if row < position.row {
1073                row += 1;
1074                offset = pos;
1075                continue;
1076            }
1077        }
1078        offset += 1;
1079        break;
1080    }
1081    if position.row - row > 0 {
1082        return Err(anyhow!("Failed to address a row: {}", position.row));
1083    }
1084    if let Some(pos) = iter.next() {
1085        if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
1086            return Err(anyhow!("Failed to address a column: {}", position.column));
1087        }
1088    } else if input.len() - offset < position.column {
1089        return Err(anyhow!("Failed to address a column over the end"));
1090    }
1091    Ok(offset + position.column)
1092}
1093
1094pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
1095    if offset > input.len() {
1096        return Err(anyhow!("Failed to address an offset: {offset}"));
1097    }
1098    let mut result = Point { row: 0, column: 0 };
1099    let mut last = 0;
1100    for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
1101        result.row += 1;
1102        last = pos;
1103    }
1104    result.column = if result.row > 0 {
1105        offset - last - 1
1106    } else {
1107        offset
1108    };
1109    Ok(result)
1110}