use crate::error::{AgmError, ErrorCode, ErrorLocation};
#[derive(Debug, Clone, PartialEq)]
pub enum LineKind {
Blank,
Comment,
NodeDeclaration(String),
ScalarField(String, String),
InlineListField(String, Vec<String>),
FieldStart(String),
ListItem(String),
IndentedLine(String),
BodyMarker,
TestExpectHeader(String),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Line {
pub kind: LineKind,
pub number: usize,
pub indent: usize,
pub raw: String,
}
fn find_tab(s: &str) -> Option<usize> {
s.bytes().position(|b| b == b'\t')
}
fn count_indent(s: &str) -> usize {
s.bytes().take_while(|&b| b == b' ').count()
}
fn is_valid_field_key(key: &str) -> bool {
let mut chars = key.chars();
match chars.next() {
Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
_ => return false,
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
fn parse_inline_list(value: &str, line_number: usize) -> Result<Vec<String>, AgmError> {
if !value.ends_with(']') {
return Err(AgmError::new(
ErrorCode::P007,
"Invalid inline list syntax",
ErrorLocation::new(None, Some(line_number), None),
));
}
let inner = &value[1..value.len() - 1];
let items: Vec<String> = inner
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
Ok(items)
}
pub fn classify_line(raw: &str, line_number: usize) -> Result<Line, AgmError> {
if find_tab(raw).is_some() {
return Err(AgmError::new(
ErrorCode::P004,
"Tab character in indentation (spaces required)",
ErrorLocation::new(None, Some(line_number), None),
));
}
let trimmed = raw.trim();
let indent = count_indent(raw);
if trimmed.is_empty() {
return Ok(Line {
kind: LineKind::Blank,
number: line_number,
indent: 0,
raw: raw.to_string(),
});
}
if let Some(rest) = trimmed.strip_prefix("# expect:") {
return Ok(Line {
kind: LineKind::TestExpectHeader(rest.trim().to_string()),
number: line_number,
indent: 0,
raw: raw.to_string(),
});
}
if trimmed.starts_with('#') {
return Ok(Line {
kind: LineKind::Comment,
number: line_number,
indent,
raw: raw.to_string(),
});
}
if trimmed == "node" || trimmed.starts_with("node ") {
let id = if trimmed == "node" {
""
} else {
trimmed["node ".len()..].trim()
};
return Ok(Line {
kind: LineKind::NodeDeclaration(id.to_string()),
number: line_number,
indent,
raw: raw.to_string(),
});
}
if let Some(rest) = trimmed.strip_prefix("body:") {
if rest.trim() == "|" {
return Ok(Line {
kind: LineKind::BodyMarker,
number: line_number,
indent,
raw: raw.to_string(),
});
}
}
if let Some(colon_pos) = raw.find(':') {
let key_raw = &raw[..colon_pos];
let key = key_raw.trim();
let value_raw = &raw[colon_pos + 1..];
let value = value_raw.trim();
if is_valid_field_key(key) {
if value.starts_with('[') {
if !value.ends_with(']') {
return Err(AgmError::new(
ErrorCode::P007,
"Invalid inline list syntax",
ErrorLocation::new(None, Some(line_number), None),
));
}
let items = parse_inline_list(value, line_number)?;
return Ok(Line {
kind: LineKind::InlineListField(key.to_string(), items),
number: line_number,
indent,
raw: raw.to_string(),
});
}
if !value.is_empty() {
return Ok(Line {
kind: LineKind::ScalarField(key.to_string(), value.to_string()),
number: line_number,
indent,
raw: raw.to_string(),
});
}
return Ok(Line {
kind: LineKind::FieldStart(key.to_string()),
number: line_number,
indent,
raw: raw.to_string(),
});
}
}
let stripped = raw.trim_start_matches(' ');
if stripped.starts_with("- ") || stripped == "-" {
let value = if stripped == "-" {
""
} else {
&stripped["- ".len()..]
};
return Ok(Line {
kind: LineKind::ListItem(value.to_string()),
number: line_number,
indent,
raw: raw.to_string(),
});
}
if indent > 0 {
return Ok(Line {
kind: LineKind::IndentedLine(trimmed.to_string()),
number: line_number,
indent,
raw: raw.to_string(),
});
}
Ok(Line {
kind: LineKind::IndentedLine(trimmed.to_string()),
number: line_number,
indent: 0,
raw: raw.to_string(),
})
}
pub fn lex(input: &str) -> Result<Vec<Line>, Vec<AgmError>> {
let mut lines = Vec::new();
let mut errors = Vec::new();
for (idx, raw_line) in input.lines().enumerate() {
match classify_line(raw_line, idx + 1) {
Ok(line) => lines.push(line),
Err(err) => errors.push(err),
}
}
if errors.is_empty() {
Ok(lines)
} else {
Err(errors)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::ErrorCode;
#[test]
fn test_classify_empty_string_returns_blank() {
let line = classify_line("", 1).unwrap();
assert_eq!(line.kind, LineKind::Blank);
assert_eq!(line.indent, 0);
}
#[test]
fn test_classify_spaces_only_returns_blank() {
let line = classify_line(" ", 1).unwrap();
assert_eq!(line.kind, LineKind::Blank);
assert_eq!(line.indent, 0);
}
#[test]
fn test_classify_single_space_returns_blank() {
let line = classify_line(" ", 1).unwrap();
assert_eq!(line.kind, LineKind::Blank);
assert_eq!(line.indent, 0);
}
#[test]
fn test_classify_hash_comment_returns_comment() {
let line = classify_line("# comment", 1).unwrap();
assert_eq!(line.kind, LineKind::Comment);
}
#[test]
fn test_classify_hash_only_returns_comment() {
let line = classify_line("#", 1).unwrap();
assert_eq!(line.kind, LineKind::Comment);
}
#[test]
fn test_classify_indented_comment_returns_comment_with_indent() {
let line = classify_line(" # indented comment", 1).unwrap();
assert_eq!(line.kind, LineKind::Comment);
assert_eq!(line.indent, 2);
}
#[test]
fn test_classify_expect_header_with_content_returns_test_expect_header() {
let line = classify_line("# expect: error AGM-P004", 1).unwrap();
assert_eq!(
line.kind,
LineKind::TestExpectHeader("error AGM-P004".to_string())
);
}
#[test]
fn test_classify_expect_header_empty_rest_returns_test_expect_header() {
let line = classify_line("# expect:", 1).unwrap();
assert_eq!(line.kind, LineKind::TestExpectHeader("".to_string()));
}
#[test]
fn test_classify_expect_without_space_returns_comment_not_test_expect() {
let line = classify_line("#expect: foo", 1).unwrap();
assert_eq!(line.kind, LineKind::Comment);
}
#[test]
fn test_classify_node_with_id_returns_node_declaration() {
let line = classify_line("node auth.login", 1).unwrap();
assert_eq!(
line.kind,
LineKind::NodeDeclaration("auth.login".to_string())
);
}
#[test]
fn test_classify_node_with_dotted_id_returns_node_declaration() {
let line = classify_line("node billing.invoice.create", 1).unwrap();
assert_eq!(
line.kind,
LineKind::NodeDeclaration("billing.invoice.create".to_string())
);
}
#[test]
fn test_classify_node_alone_returns_node_declaration_empty_id() {
let line = classify_line("node", 1).unwrap();
assert_eq!(line.kind, LineKind::NodeDeclaration("".to_string()));
}
#[test]
fn test_classify_node_with_extra_spaces_trims_id() {
let line = classify_line("node auth.login ", 1).unwrap();
assert_eq!(
line.kind,
LineKind::NodeDeclaration("auth.login".to_string())
);
}
#[test]
fn test_classify_body_pipe_returns_body_marker() {
let line = classify_line("body: |", 1).unwrap();
assert_eq!(line.kind, LineKind::BodyMarker);
}
#[test]
fn test_classify_body_pipe_with_spaces_returns_body_marker() {
let line = classify_line("body: | ", 1).unwrap();
assert_eq!(line.kind, LineKind::BodyMarker);
}
#[test]
fn test_classify_body_pipe_with_suffix_returns_scalar_field() {
let line = classify_line("body: |something", 1).unwrap();
assert_eq!(
line.kind,
LineKind::ScalarField("body".to_string(), "|something".to_string())
);
}
#[test]
fn test_classify_inline_list_multiple_items_returns_inline_list_field() {
let line = classify_line("tags: [auth, security]", 1).unwrap();
assert_eq!(
line.kind,
LineKind::InlineListField(
"tags".to_string(),
vec!["auth".to_string(), "security".to_string()]
)
);
}
#[test]
fn test_classify_inline_list_single_item_returns_inline_list_field() {
let line = classify_line("tags: [auth]", 1).unwrap();
assert_eq!(
line.kind,
LineKind::InlineListField("tags".to_string(), vec!["auth".to_string()])
);
}
#[test]
fn test_classify_inline_list_empty_returns_inline_list_field_empty() {
let line = classify_line("tags: []", 1).unwrap();
assert_eq!(
line.kind,
LineKind::InlineListField("tags".to_string(), vec![])
);
}
#[test]
fn test_classify_inline_list_unclosed_returns_err_p007() {
let err = classify_line("tags: [auth, security", 1).unwrap_err();
assert_eq!(err.code, ErrorCode::P007);
}
#[test]
fn test_classify_scalar_field_simple_returns_scalar_field() {
let line = classify_line("type: workflow", 1).unwrap();
assert_eq!(
line.kind,
LineKind::ScalarField("type".to_string(), "workflow".to_string())
);
}
#[test]
fn test_classify_scalar_field_with_colon_in_value_keeps_rest() {
let line = classify_line("summary: Rule: no tabs allowed", 1).unwrap();
assert_eq!(
line.kind,
LineKind::ScalarField("summary".to_string(), "Rule: no tabs allowed".to_string())
);
}
#[test]
fn test_classify_scalar_field_trims_value_whitespace() {
let line = classify_line("type: workflow ", 1).unwrap();
assert_eq!(
line.kind,
LineKind::ScalarField("type".to_string(), "workflow".to_string())
);
}
#[test]
fn test_classify_field_start_no_value_returns_field_start() {
let line = classify_line("items:", 1).unwrap();
assert_eq!(line.kind, LineKind::FieldStart("items".to_string()));
}
#[test]
fn test_classify_field_start_with_trailing_spaces_returns_field_start() {
let line = classify_line("items: ", 1).unwrap();
assert_eq!(line.kind, LineKind::FieldStart("items".to_string()));
}
#[test]
fn test_classify_list_item_with_content_returns_list_item_with_indent() {
let line = classify_line(" - first item", 1).unwrap();
assert_eq!(line.kind, LineKind::ListItem("first item".to_string()));
assert_eq!(line.indent, 2);
}
#[test]
fn test_classify_list_item_dash_only_returns_list_item_empty() {
let line = classify_line(" -", 1).unwrap();
assert_eq!(line.kind, LineKind::ListItem("".to_string()));
assert_eq!(line.indent, 2);
}
#[test]
fn test_classify_list_item_no_space_after_dash_returns_indented_line() {
let line = classify_line(" -value", 1).unwrap();
assert_eq!(line.kind, LineKind::IndentedLine("-value".to_string()));
assert_eq!(line.indent, 2);
}
#[test]
fn test_classify_indented_text_returns_indented_line_with_indent() {
let line = classify_line(" Some block text", 1).unwrap();
assert_eq!(
line.kind,
LineKind::IndentedLine("Some block text".to_string())
);
assert_eq!(line.indent, 2);
}
#[test]
fn test_classify_deeply_indented_text_returns_indented_line() {
let line = classify_line(" deep text", 1).unwrap();
assert_eq!(line.kind, LineKind::IndentedLine("deep text".to_string()));
assert_eq!(line.indent, 6);
}
#[test]
fn test_classify_tab_at_start_returns_err_p004() {
let err = classify_line("\ttype: workflow", 1).unwrap_err();
assert_eq!(err.code, ErrorCode::P004);
}
#[test]
fn test_classify_tab_in_middle_returns_err_p004() {
let err = classify_line("type:\tworkflow", 1).unwrap_err();
assert_eq!(err.code, ErrorCode::P004);
}
#[test]
fn test_classify_tab_only_returns_err_p004() {
let err = classify_line("\t", 1).unwrap_err();
assert_eq!(err.code, ErrorCode::P004);
}
#[test]
fn test_lex_valid_snippet_returns_ok_with_correct_lines() {
let input = "node auth.login\ntype: workflow\nsummary: Login flow\n";
let lines = lex(input).unwrap();
assert_eq!(lines.len(), 3);
assert_eq!(
lines[0].kind,
LineKind::NodeDeclaration("auth.login".to_string())
);
assert_eq!(
lines[1].kind,
LineKind::ScalarField("type".to_string(), "workflow".to_string())
);
assert_eq!(
lines[2].kind,
LineKind::ScalarField("summary".to_string(), "Login flow".to_string())
);
}
#[test]
fn test_lex_two_tab_lines_returns_err_with_two_p004_errors() {
let input = "\ttype: workflow\nsummary: ok\n\tversion: 1\n";
let errors = lex(input).unwrap_err();
assert_eq!(errors.len(), 2);
assert!(errors.iter().all(|e| e.code == ErrorCode::P004));
}
#[test]
fn test_lex_empty_input_returns_ok_empty_vec() {
let lines = lex("").unwrap();
assert_eq!(lines, vec![]);
}
}