use cirru_parser::Cirru;
use std::sync::Arc;
pub fn validate_cirru_syntax(node: &Cirru) -> Result<(), String> {
validate_node_recursive(node, &mut vec![], false)
}
fn validate_node_recursive(node: &Cirru, path: &mut Vec<usize>, in_comment: bool) -> Result<(), String> {
match node {
Cirru::Leaf(s) => validate_leaf(s, path, in_comment)?,
Cirru::List(items) => {
let is_comment = if let Some(Cirru::Leaf(first)) = items.first() {
first.as_ref() == ";"
} else {
false
};
for (idx, item) in items.iter().enumerate() {
path.push(idx);
let child_in_comment = is_comment && idx > 0;
validate_node_recursive(item, path, child_in_comment)?;
path.pop();
}
}
}
Ok(())
}
fn validate_leaf(s: &Arc<str>, path: &[usize], in_comment: bool) -> Result<(), String> {
let text = s.as_ref();
if text.is_empty() {
return Ok(());
}
if text.starts_with(':') {
if text.contains(' ') {
return Err(format!(
"Invalid tag at path [{}]: Tags cannot contain spaces\n\
Found: {:?}\n\
Hint: Tags like :tag should be single tokens without spaces; if this is literal text, use a string such as |text or \"|text with spaces\"",
format_path(path),
text
));
}
return Ok(());
}
if text.starts_with('\'') {
if text.contains(' ') {
return Err(format!(
"Invalid symbol at path [{}]: Symbols cannot contain spaces\n\
Found: {:?}\n\
Hint: Symbols like 'atom should be single tokens without spaces",
format_path(path),
text
));
}
return Ok(());
}
if text.starts_with('|') || text.starts_with('"') {
return Ok(());
}
if text.starts_with(';') {
return Ok(());
}
if text.contains('(') || text.contains(')') {
return Err(format!(
"Invalid leaf node at path [{}]: Contains parentheses which are structural characters\n\
Found: {:?}\n\
Hint: Parentheses ( ) are only for list structure, not leaf content; if you need literal parentheses in text, wrap the value as a string such as |text(with-parens) or \"|text with (parens)\"",
format_path(path),
text
));
}
if in_comment {
return Ok(());
}
let first_char = text.chars().next().unwrap();
if first_char.is_ascii_digit() {
if !is_valid_number(text) {
return Err(format!(
"Invalid number format at path [{}]: Starts with digit but cannot be parsed as number\n\
Found: {:?}\n\
Hint: Valid formats include: 123, -456, 3.14, 1e10, 0x1F, 0b1010, 0o77; if this is a literal token rather than a number, wrap it as a string",
format_path(path),
text
));
}
return Ok(());
}
if (first_char == '+' || first_char == '-') && text.len() > 1 {
let second_char = text.chars().nth(1).unwrap();
if second_char.is_ascii_digit() {
if !is_valid_number(text) {
return Err(format!(
"Invalid number format at path [{}]: Starts with {}{} but cannot be parsed as number\n\
Found: {:?}\n\
Hint: Valid formats include: +123, -456, +3.14, -1e10; if this is not meant to be a number, write it as a string instead of a bare leaf",
format_path(path),
first_char,
second_char,
text
));
}
return Ok(());
}
}
if text.contains(' ') {
return Err(format!(
"Suspicious leaf node at path [{}]: Contains spaces but is not a string\n\
Found: {:?}\n\
Hint: If this is meant to be a string, prefix with | for simple text, or use \"|...\" for one-line text with spaces/special characters\n\
If it's multiple tokens, it should be a list (separate expressions)",
format_path(path),
text
));
}
Ok(())
}
fn is_valid_number(text: &str) -> bool {
if text.parse::<i64>().is_ok() || text.parse::<u64>().is_ok() {
return true;
}
if text.len() > 2 && (text.starts_with("0x") || text.starts_with("0X")) && text[2..].chars().all(|c| c.is_ascii_hexdigit()) {
return true;
}
if text.len() > 2 && (text.starts_with("0b") || text.starts_with("0B")) && text[2..].chars().all(|c| c == '0' || c == '1') {
return true;
}
if text.len() > 2 && (text.starts_with("0o") || text.starts_with("0O")) && text[2..].chars().all(|c| c.is_ascii_digit() && c < '8') {
return true;
}
if text.parse::<f64>().is_ok() {
return true;
}
if text.contains('e') || text.contains('E') {
let parts: Vec<&str> = if text.contains('e') {
text.split('e').collect()
} else {
text.split('E').collect()
};
if parts.len() == 2 {
let base_valid = parts[0].parse::<f64>().is_ok();
let exp_valid = parts[1].parse::<i32>().is_ok();
if base_valid && exp_valid {
return true;
}
}
}
false
}
fn format_path(path: &[usize]) -> String {
if path.is_empty() {
"root".to_string()
} else {
path.iter().map(|i| i.to_string()).collect::<Vec<_>>().join(",")
}
}
#[cfg(test)]
mod tests {
use super::*;
fn leaf(s: &str) -> Cirru {
Cirru::Leaf(Arc::from(s))
}
fn list(items: Vec<Cirru>) -> Cirru {
Cirru::List(items)
}
#[test]
fn test_valid_tags() {
assert!(validate_cirru_syntax(&leaf(":tag")).is_ok());
assert!(validate_cirru_syntax(&leaf(":event/click")).is_ok());
assert!(validate_cirru_syntax(&leaf(":ns/def")).is_ok());
}
#[test]
fn test_invalid_tags_with_spaces() {
let result = validate_cirru_syntax(&leaf(":tag with space"));
assert!(result.is_err());
assert!(result.unwrap_err().contains("Tags cannot contain spaces"));
}
#[test]
fn test_valid_quoted_symbols() {
assert!(validate_cirru_syntax(&leaf("'atom")).is_ok());
assert!(validate_cirru_syntax(&leaf("'my-symbol")).is_ok());
assert!(validate_cirru_syntax(&leaf("'x")).is_ok());
}
#[test]
fn test_invalid_symbols_with_spaces() {
let result = validate_cirru_syntax(&leaf("'a b"));
assert!(result.is_err());
assert!(result.unwrap_err().contains("Symbols cannot contain spaces"));
}
#[test]
fn test_invalid_parentheses_in_leaves() {
let result1 = validate_cirru_syntax(&leaf("hello(world)"));
assert!(result1.is_err());
assert!(result1.unwrap_err().contains("parentheses"));
let result2 = validate_cirru_syntax(&leaf("(bad)"));
assert!(result2.is_err());
let result3 = validate_cirru_syntax(&leaf("test)"));
assert!(result3.is_err());
}
#[test]
fn test_valid_strings_with_parentheses() {
assert!(validate_cirru_syntax(&leaf("|hello (world)")).is_ok());
assert!(validate_cirru_syntax(&leaf("\"text (with) parens\"")).is_ok());
}
#[test]
fn test_comments_allow_flexible_content() {
let comment_list = list(vec![leaf(";"), leaf("this is a comment")]);
assert!(validate_cirru_syntax(&comment_list).is_ok());
let comment_with_weird_number = list(vec![leaf(";"), leaf("测试 1: something")]);
assert!(validate_cirru_syntax(&comment_with_weird_number).is_ok());
let comment_with_parens = list(vec![leaf(";"), leaf("bad (comment)")]);
assert!(validate_cirru_syntax(&comment_with_parens).is_err());
}
#[test]
fn test_valid_strings() {
assert!(validate_cirru_syntax(&leaf("|hello world")).is_ok());
assert!(validate_cirru_syntax(&leaf("\"hello world\"")).is_ok());
assert!(validate_cirru_syntax(&leaf("|text with spaces")).is_ok());
}
#[test]
fn test_valid_numbers() {
assert!(validate_cirru_syntax(&leaf("123")).is_ok());
assert!(validate_cirru_syntax(&leaf("-456")).is_ok());
assert!(validate_cirru_syntax(&leaf("3.14")).is_ok());
assert!(validate_cirru_syntax(&leaf("1e10")).is_ok());
assert!(validate_cirru_syntax(&leaf("1.5e-3")).is_ok());
assert!(validate_cirru_syntax(&leaf("0x1F")).is_ok());
assert!(validate_cirru_syntax(&leaf("0b1010")).is_ok());
assert!(validate_cirru_syntax(&leaf("0o77")).is_ok());
}
#[test]
fn test_invalid_numbers() {
let result = validate_cirru_syntax(&leaf("123abc"));
assert!(result.is_err());
assert!(result.unwrap_err().contains("cannot be parsed as number"));
}
#[test]
fn test_invalid_non_numeric_token_starting_with_digits_has_string_hint() {
let result = validate_cirru_syntax(&leaf("100vh"));
assert!(result.is_err());
assert!(result.unwrap_err().contains("literal token rather than a number"));
}
#[test]
fn test_parentheses_hint_mentions_literal_parentheses_strings() {
let result = validate_cirru_syntax(&leaf("text(with-parens)"));
assert!(result.is_err());
let message = result.unwrap_err();
assert!(message.contains("literal parentheses in text"));
assert!(message.contains("|text(with-parens)"));
}
#[test]
fn test_parentheses_hint_mentions_string_wrapping() {
let result = validate_cirru_syntax(&leaf("text with (parens)"));
assert!(result.is_err());
let message = result.unwrap_err();
assert!(message.contains("wrap the value as a string") || message.contains("string such as"));
assert!(message.contains("\"|text with (parens)\""));
}
#[test]
fn test_valid_symbols() {
assert!(validate_cirru_syntax(&leaf("defn")).is_ok());
assert!(validate_cirru_syntax(&leaf("valid?")).is_ok());
assert!(validate_cirru_syntax(&leaf("add!")).is_ok());
assert!(validate_cirru_syntax(&leaf("+")).is_ok());
assert!(validate_cirru_syntax(&leaf("->")).is_ok());
assert!(validate_cirru_syntax(&leaf("&+")).is_ok());
}
#[test]
fn test_suspicious_leaf_with_spaces() {
let result = validate_cirru_syntax(&leaf("hello world"));
assert!(result.is_err());
assert!(result.unwrap_err().contains("Contains spaces but is not a string"));
}
#[test]
fn test_valid_nested_structure() {
let tree = list(vec![
leaf("defn"),
leaf("add"),
list(vec![leaf("a"), leaf("b")]),
list(vec![leaf("+"), leaf("a"), leaf("b")]),
]);
assert!(validate_cirru_syntax(&tree).is_ok());
}
#[test]
fn test_invalid_nested_structure() {
let tree = list(vec![
leaf("defn"),
leaf("my func"), list(vec![leaf("a"), leaf("b")]),
]);
let result = validate_cirru_syntax(&tree);
assert!(result.is_err());
assert!(result.unwrap_err().contains("Contains spaces but is not a string"));
}
}