mod common;
use common::{identity_test, IdentityFixtures};
use std::collections::HashMap;
use std::fs;
const FIXTURES_PATH: &str = "tests/sqlglot_fixtures";
#[test]
fn analyze_identity_failures() {
std::thread::Builder::new()
.stack_size(16 * 1024 * 1024) .spawn(|| {
analyze_identity_failures_impl();
})
.expect("Failed to spawn test thread")
.join()
.expect("Test thread panicked");
}
fn analyze_identity_failures_impl() {
let path = format!("{}/identity.json", FIXTURES_PATH);
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(e) => {
println!("Failed to read fixtures: {}", e);
return;
}
};
let fixtures: IdentityFixtures = match serde_json::from_str(&content) {
Ok(f) => f,
Err(e) => {
println!("Failed to parse fixtures: {}", e);
return;
}
};
let mut categories: HashMap<String, Vec<(usize, String, String)>> = HashMap::new();
for test in &fixtures.tests {
let result = identity_test(&test.sql);
if let Err(msg) = result {
let category = categorize_error(&msg, &test.sql);
categories
.entry(category)
.or_default()
.push((test.line, test.sql.clone(), msg));
}
}
let mut sorted: Vec<_> = categories.iter().collect();
sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
println!("\n\n=== FAILURE ANALYSIS BY CATEGORY ===\n");
for (category, failures) in sorted {
println!("## {} ({} failures)", category, failures.len());
println!();
let show_count = if category == "Syntax error (expected token)" {
failures.len()
} else {
5
};
for (line, sql, msg) in failures.iter().take(show_count) {
println!("- Line {}: `{}`", line, truncate(sql, 80));
if category == "Syntax error (expected token)" {
println!(" Error: {}", truncate(msg, 100));
}
}
if failures.len() > show_count {
println!("- ... and {} more", failures.len() - show_count);
}
println!();
}
println!("\n=== SUMMARY TABLE ===\n");
println!("| Category | Count |");
println!("|----------|-------|");
let mut sorted: Vec<_> = categories.iter().collect();
sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
for (category, failures) in &sorted {
println!("| {} | {} |", category, failures.len());
}
let total_failures: usize = sorted.iter().map(|(_, v)| v.len()).sum();
println!("| **TOTAL** | **{}** |", total_failures);
}
fn categorize_error(msg: &str, sql: &str) -> String {
if msg.contains("Unexpected token: Amp") {
return "Bitwise AND operator (&)".to_string();
}
if msg.contains("Unexpected token: Pipe") {
return "Bitwise OR operator (|)".to_string();
}
if msg.contains("Unexpected token: Caret") {
return "Bitwise XOR operator (^)".to_string();
}
if msg.contains("Unexpected token: Tilde") {
return "Bitwise NOT operator (~)".to_string();
}
if msg.contains("Unexpected token: Lt") && msg.contains("Parse error") {
return "Bit shift operator (<<)".to_string();
}
if msg.contains("Unexpected token: Gt") && msg.contains("Parse error") {
return "Bit shift operator (>>)".to_string();
}
if msg.contains("Unexpected token: Update") {
return "FOR UPDATE clause".to_string();
}
if msg.contains("Unexpected token: End") {
return "END keyword in expression".to_string();
}
if msg.contains("Unexpected token: At") {
return "AT TIME ZONE / @ operator".to_string();
}
if msg.contains("Unexpected token: Hash") {
return "Hash operator (#)".to_string();
}
if msg.contains("Unexpected token: Arrow") {
return "Arrow operator (->)".to_string();
}
if msg.contains("Unexpected token: DArrow") {
return "Double arrow operator (->>)".to_string();
}
if msg.contains("Unexpected token: Colon") {
return "Colon in expression".to_string();
}
if msg.contains("Unexpected token: Dollar") {
return "Dollar sign ($)".to_string();
}
if msg.contains("Unexpected token: Question") {
return "Question mark operator (?)".to_string();
}
if msg.contains("Unexpected token: Parameter") {
return "Parameter placeholders".to_string();
}
if msg.contains("Unexpected token: Values") {
return "VALUES expression".to_string();
}
if msg.contains("Unexpected token: With") {
return "WITH clause parsing".to_string();
}
if msg.contains("Unexpected token: Extract") {
return "EXTRACT function".to_string();
}
if msg.contains("Unexpected token: Interval") {
return "INTERVAL expressions".to_string();
}
if msg.contains("Unexpected token: Date") {
return "DATE literals/functions".to_string();
}
if msg.contains("Unexpected token: Timestamp") {
return "TIMESTAMP literals/functions".to_string();
}
if msg.contains("Unexpected token: Time") {
return "TIME literals/functions".to_string();
}
if msg.contains("Unexpected token: Array") {
return "ARRAY expressions".to_string();
}
if msg.contains("Unexpected token: Map") {
return "MAP expressions".to_string();
}
if msg.contains("Unexpected token: Struct") {
return "STRUCT expressions".to_string();
}
if msg.contains("Unexpected token: Lambda") {
return "Lambda expressions".to_string();
}
if msg.contains("Unexpected token: If") {
return "IF expressions".to_string();
}
if msg.contains("Unexpected token: Into") {
return "INTO clause".to_string();
}
if msg.contains("Unexpected token: Using") {
return "USING clause".to_string();
}
if msg.contains("Unexpected token: Over") {
return "OVER clause parsing".to_string();
}
if msg.contains("Unexpected token: Partition") {
return "PARTITION BY parsing".to_string();
}
if msg.contains("Unexpected token: Rows") || msg.contains("Unexpected token: Range") {
return "Window frame specification".to_string();
}
if msg.contains("Unexpected token: Preceding") || msg.contains("Unexpected token: Following") {
return "Window frame bounds".to_string();
}
if msg.contains("Unexpected token: Nulls") {
return "NULLS FIRST/LAST".to_string();
}
if msg.contains("Unexpected token: Filter") {
return "FILTER clause".to_string();
}
if msg.contains("Unexpected token: Within") {
return "WITHIN GROUP".to_string();
}
if msg.contains("Unexpected token: Format") {
return "FORMAT clause".to_string();
}
if msg.contains("Unexpected token: Trim") {
return "TRIM function".to_string();
}
if msg.contains("Unexpected token: Substring") {
return "SUBSTRING function".to_string();
}
if msg.contains("Unexpected token: Position") {
return "POSITION function".to_string();
}
if msg.contains("Unexpected token: Overlay") {
return "OVERLAY function".to_string();
}
if msg.contains("Unexpected token: Collate") {
return "COLLATE clause".to_string();
}
if msg.contains("Unexpected token: Set") {
return "SET operations".to_string();
}
if msg.contains("Unexpected token: Escape") {
return "ESCAPE clause".to_string();
}
if msg.contains("Unexpected token: Recursive") {
return "RECURSIVE CTE".to_string();
}
if msg.contains("Unexpected token: Lateral") {
return "LATERAL keyword".to_string();
}
if msg.contains("Unexpected token: Tablesample") {
return "TABLESAMPLE clause".to_string();
}
if msg.contains("Unexpected token: Natural") {
return "NATURAL JOIN".to_string();
}
if msg.contains("Unexpected token: Global") {
return "GLOBAL keyword".to_string();
}
if msg.contains("Unexpected token: Match") {
return "MATCH expressions".to_string();
}
if msg.contains("Unexpected token: Glob") {
return "GLOB operator".to_string();
}
if msg.contains("Unexpected token: Regexp") || msg.contains("Unexpected token: Rlike") {
return "REGEXP/RLIKE operators".to_string();
}
if msg.contains("Unexpected token: Similar") {
return "SIMILAR TO operator".to_string();
}
if msg.contains("Unexpected token: Ilike") {
return "ILIKE operator".to_string();
}
if msg.contains("Unexpected token: Explain") {
return "EXPLAIN statement".to_string();
}
if msg.contains("Unexpected token: Describe") {
return "DESCRIBE statement".to_string();
}
if msg.contains("Unexpected token: Show") {
return "SHOW statement".to_string();
}
if msg.contains("Unexpected token: Pragma") {
return "PRAGMA statement".to_string();
}
if msg.contains("Unexpected token: Fetch") {
return "FETCH clause".to_string();
}
if msg.contains("Unexpected token: Lock") {
return "LOCK clause".to_string();
}
if msg.contains("Mismatch") {
if sql.contains("\"\"\"") || sql.contains("'''") {
return "Triple-quoted strings".to_string();
}
if sql.contains("$$") {
return "Dollar-quoted strings".to_string();
}
if sql.contains("\\") {
return "Escape sequences in strings".to_string();
}
if sql.contains("::") {
return "PostgreSQL cast operator (::)".to_string();
}
if sql.to_uppercase().contains("DISTINCT ON") {
return "DISTINCT ON clause".to_string();
}
if sql.to_uppercase().contains("GROUP BY ALL") {
return "GROUP BY ALL".to_string();
}
if sql.to_uppercase().contains("ORDER BY ALL") {
return "ORDER BY ALL".to_string();
}
if sql.contains("/*") || sql.contains("--") {
return "Comment handling".to_string();
}
return "Output mismatch (other)".to_string();
}
if msg.contains("Unexpected token:") {
let token = msg.split("Unexpected token:").nth(1).unwrap_or("Unknown");
let token = token.trim().split_whitespace().next().unwrap_or("Unknown");
return format!("Unexpected token: {}", token);
}
if msg.contains("Expected") {
return "Syntax error (expected token)".to_string();
}
"Other error".to_string()
}
fn truncate(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
s.to_string()
} else {
format!("{}...", &s[..max_len - 3])
}
}