use clap::Parser as ClapParser;
#[derive(Debug)]
struct ParseResult {
last_position: usize,
#[allow(dead_code)]
total_bytes: usize,
parsed_bytes: usize,
remaining_bytes: usize,
#[allow(dead_code)]
total_lines: usize,
parsed_lines: usize,
remaining_lines: usize,
truncated_content: String,
unparsed_content: String,
}
use pest::Parser;
use std::fs;
use std::path::PathBuf;
use ustar_parser::parsers::ascii::{AsciiParser, Rule};
use ustar_parser::{default_config, parse, ConfigKey, ConfigValue, ErrorFormatMode};
#[derive(ClapParser, Debug)]
#[command(name = "ustar-parse-debugger")]
#[command(about = "Debug STAR file parsing by finding the last parseable position")]
struct Args {
#[arg(value_name = "FILE")]
input: PathBuf,
#[arg(short, long)]
full_tree: bool,
#[arg(short, long)]
whitespace: bool,
}
fn main() {
let args = Args::parse();
let content = match fs::read_to_string(&args.input) {
Ok(c) => c,
Err(e) => {
eprintln!("Error reading file {:?}: {}", args.input, e);
std::process::exit(1);
}
};
println!("uSTAR Parse Debugger\n");
let mut config = default_config();
config.insert(
ConfigKey::ErrorFormat,
ConfigValue::ErrorFormat(ErrorFormatMode::Fancy),
);
config.insert(ConfigKey::ContextLines, ConfigValue::Usize(10));
let stored_result = match parse(&content, &config) {
Ok(_) => {
println!("✓ File parses successfully!");
std::process::exit(0);
}
Err(e) => {
let result = {
match AsciiParser::parse(Rule::star_file, &content) {
Ok(_) => {
println!(
"Unexpected: direct pest parser succeeded where new system failed"
);
std::process::exit(0);
}
Err(pest_error) => {
let error_pos = match pest_error.location {
pest::error::InputLocation::Pos(pos) => pos,
pest::error::InputLocation::Span((start, _)) => start,
};
let (error_line, _error_col) = match pest_error.line_col {
pest::error::LineColLocation::Pos((line, col)) => (line, col),
pest::error::LineColLocation::Span((line, col), _) => (line, col),
};
println!("\n=== Attempting to find last parseable position ===\n");
let parse_result = find_last_good_parse(&content, error_pos);
if let Some(result) = parse_result {
display_parse_debug_info(
&content,
error_line,
&result,
args.full_tree,
!args.whitespace,
);
Some(result)
} else {
println!(" Could not find a successful parse point.");
println!(" The file may have fundamental syntax errors near the beginning.");
None
}
}
}
};
println!("\n=== Error Message ===\n");
println!("{}\n", e.format_error(ErrorFormatMode::Fancy, 10));
result
}
};
println!("✗ Parse failed\n");
if let Some(result) = stored_result {
println!("Last parsed position: {} bytes", result.last_position);
println!("Bytes parsed: {} bytes", result.parsed_bytes);
println!("Bytes remaining: {} bytes", result.remaining_bytes);
println!("Lines parsed: {} lines", result.parsed_lines);
println!("Lines remaining: {} lines\n", result.remaining_lines);
} else {
println!("No successful parse position could be determined.\n");
}
println!("File: {}", args.input.display());
println!("Size: {} bytes", content.len());
}
fn find_last_good_parse(content: &str, error_pos: usize) -> Option<ParseResult> {
let mut current_pos = error_pos.min(content.len());
let mut attempts = 0;
let max_attempts = 10000;
while current_pos > 0 && attempts < max_attempts {
attempts += 1;
let mut found_boundary = false;
while current_pos > 0 {
current_pos -= 1;
let ch = content.as_bytes()[current_pos] as char;
if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
found_boundary = true;
break;
}
}
if !found_boundary {
break;
}
while current_pos > 0 {
let ch = content.as_bytes()[current_pos.saturating_sub(1)] as char;
if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' {
break;
}
current_pos -= 1;
}
if current_pos == 0 {
break;
}
let truncated = &content[..current_pos];
match AsciiParser::parse(Rule::star_file, truncated) {
Ok(_) => {
let lines_before: Vec<&str> = truncated.lines().collect();
let parse_line = lines_before.len();
let total_bytes = content.len();
let parsed_bytes = current_pos;
let remaining_bytes = total_bytes - parsed_bytes;
let total_lines = content.lines().count();
let parsed_lines = parse_line;
let remaining_lines = total_lines - parsed_lines;
let unparsed_content = content[current_pos..].to_string();
return Some(ParseResult {
last_position: current_pos,
total_bytes,
parsed_bytes,
remaining_bytes,
total_lines,
parsed_lines,
remaining_lines,
truncated_content: truncated.to_string(),
unparsed_content,
});
}
Err(_) => {
continue;
}
}
}
None
}
fn display_parse_debug_info(
content: &str,
error_line: usize,
result: &ParseResult,
show_full_tree: bool,
no_visible_whitespace: bool,
) {
println!(
" ✓ Found successful parse at byte position: {}\n",
result.last_position
);
match AsciiParser::parse(Rule::star_file, &result.truncated_content) {
Ok(pairs) => {
if show_full_tree {
println!("=== Full Parse Tree ===\n");
for pair in pairs {
println!("{:#?}", pair);
}
} else {
println!("=== Successful Parse Tree ===\n");
for pair in pairs {
print_tree_summary(&pair, 0, content);
}
}
}
Err(_) => {
println!("Error: Could not re-parse truncated content for tree display");
}
}
println!();
println!("=== Unparsed Content ===\n");
let remaining_lines: Vec<&str> = split_lines_with_endings(&result.unparsed_content);
if remaining_lines.is_empty() {
println!(" (No remaining content - file ends cleanly)");
} else {
let lines_to_error = error_line - result.parsed_lines;
let total_lines_to_show = lines_to_error;
println!(
" Showing unparsed content ({} lines):\n",
total_lines_to_show
);
for (i, line) in remaining_lines.iter().take(total_lines_to_show).enumerate() {
let line_num = result.parsed_lines + i + 1;
let visible_line = make_whitespace_visible(line, no_visible_whitespace);
println!(" {:4}: {}", line_num, visible_line);
}
println!();
}
}
fn split_lines_with_endings(content: &str) -> Vec<&str> {
let mut lines = Vec::new();
let mut start = 0;
let bytes = content.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\n' {
lines.push(&content[start..=i]);
start = i + 1;
} else if bytes[i] == b'\r' {
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
lines.push(&content[start..=i + 1]);
i += 1; start = i + 1;
} else {
lines.push(&content[start..=i]);
start = i + 1;
}
}
i += 1;
}
if start < content.len() {
lines.push(&content[start..]);
}
lines
}
fn make_whitespace_visible(line: &str, no_visible_whitespace: bool) -> String {
if no_visible_whitespace {
return line.trim_end_matches(&['\r', '\n'][..]).to_string();
}
let mut result = String::new();
for ch in line.chars() {
match ch {
' ' => result.push_str("\x1b[90m·\x1b[0m"), '\t' => result.push_str("\x1b[90m→\x1b[0m"), '\r' => result.push_str("\x1b[90m␍\x1b[0m"), '\n' => result.push_str("\x1b[90m␊\x1b[0m"), _ => result.push(ch),
}
}
result
}
fn calculate_max_depth(pair: &pest::iterators::Pair<Rule>, current_depth: usize) -> usize {
let mut max = current_depth;
for inner in pair.clone().into_inner() {
let child_max = calculate_max_depth(&inner, current_depth + 1);
max = max.max(child_max);
}
max
}
fn byte_to_line(content: &str, byte_pos: usize) -> usize {
content[..byte_pos.min(content.len())]
.chars()
.filter(|&c| c == '\n')
.count()
+ 1
}
fn find_max_line(pair: &pest::iterators::Pair<Rule>, content: &str) -> usize {
let span = pair.as_span();
let mut max_line = byte_to_line(content, span.end());
for inner in pair.clone().into_inner() {
let child_max = find_max_line(&inner, content);
max_line = max_line.max(child_max);
}
max_line
}
fn print_tree_summary(pair: &pest::iterators::Pair<Rule>, depth: usize, content: &str) {
print_tree_summary_with_indent(pair, depth, content, 4);
}
fn print_tree_summary_with_indent(
pair: &pest::iterators::Pair<Rule>,
depth: usize,
content: &str,
indent_spaces: usize,
) {
let max_depth = calculate_max_depth(pair, depth);
let max_line = find_max_line(pair, content);
let mut tree_lines = Vec::new();
let mut text_lines = Vec::new();
build_tree_lines(
pair,
depth,
max_depth,
content,
max_line,
&mut tree_lines,
&mut text_lines,
);
let max_tree_width = tree_lines.iter().map(|s| s.len()).max().unwrap_or(0);
let indent = " ".repeat(indent_spaces);
for (tree_line, text_line) in tree_lines.iter().zip(text_lines.iter()) {
println!(
"{}{:<width$} \"{}\"",
indent,
tree_line,
text_line,
width = max_tree_width
);
}
}
fn build_tree_lines(
pair: &pest::iterators::Pair<Rule>,
depth: usize,
max_depth: usize,
content: &str,
max_line: usize,
tree_lines: &mut Vec<String>,
text_lines: &mut Vec<String>,
) {
let rule = pair.as_rule();
let span = pair.as_span();
let start_line = byte_to_line(content, span.start());
let end_line = byte_to_line(content, span.end());
let line_width = max_line.to_string().len();
let depth_width = max_depth.to_string().len();
let indent = " ".repeat(depth);
let tree_line = format!(
"{:>line_width$} | {:>depth_width$} | {}{:?} ({}-{}) lines {}-{}",
start_line,
depth,
indent,
rule,
span.start(),
span.end(),
start_line,
end_line,
line_width = line_width,
depth_width = depth_width
);
let token_text = span.as_str();
let formatted_text = if token_text.len() > 60 {
let first_30: String = token_text.chars().take(30).collect();
let last_30: String = token_text
.chars()
.rev()
.take(30)
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
format!("{}...{}", first_30, last_30)
} else {
token_text.to_string()
};
let display_text = formatted_text
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t");
tree_lines.push(tree_line);
text_lines.push(display_text);
for inner in pair.clone().into_inner() {
build_tree_lines(
&inner,
depth + 1,
max_depth,
content,
max_line,
tree_lines,
text_lines,
);
}
}