weave-content 0.2.8

Content DSL parser, validator, and builder for OSINT case files
Documentation
use crate::parser::ParseError;
use crate::relationship::Rel;

/// Maximum timeline chains per file.
const MAX_CHAINS: usize = 10;

/// Maximum events per chain.
const MAX_EVENTS_PER_CHAIN: usize = 20;

/// Parse the `## Timeline` section body into NEXT relationships.
///
/// Each non-blank line is a chain: `Event A -> Event B -> Event C`.
/// `event_names` are entity names from the `## Events` section.
pub fn parse_timeline(
    body: &str,
    section_start_line: usize,
    event_names: &[&str],
    errors: &mut Vec<ParseError>,
) -> Vec<Rel> {
    let mut chains: Vec<Vec<&str>> = Vec::new();
    let mut chain_lines: Vec<usize> = Vec::new();

    for (i, line) in body.lines().enumerate() {
        let file_line = section_start_line + 1 + i;
        let trimmed = line.trim();

        if trimmed.is_empty() {
            continue;
        }

        let events: Vec<&str> = trimmed.split(" -> ").map(str::trim).collect();

        if events.len() < 2 {
            errors.push(ParseError {
                line: file_line,
                message: format!("timeline chain must have at least 2 events (got {trimmed:?})"),
            });
            continue;
        }

        if events.len() > MAX_EVENTS_PER_CHAIN {
            errors.push(ParseError {
                line: file_line,
                message: format!(
                    "timeline chain exceeds {MAX_EVENTS_PER_CHAIN} events (got {})",
                    events.len()
                ),
            });
            continue;
        }

        // Validate all event names exist in Events section
        for event in &events {
            if !event_names.contains(event) {
                errors.push(ParseError {
                    line: file_line,
                    message: format!("timeline entity {event:?} not found in Events section"),
                });
            }
        }

        chains.push(events);
        chain_lines.push(file_line);
    }

    if chains.len() > MAX_CHAINS {
        errors.push(ParseError {
            line: section_start_line,
            message: format!(
                "too many timeline chains (max {MAX_CHAINS}, got {})",
                chains.len()
            ),
        });
    }

    // Generate NEXT relationships
    let mut rels = Vec::new();
    for (chain, &line) in chains.iter().zip(chain_lines.iter()) {
        for pair in chain.windows(2) {
            rels.push(Rel {
                source_name: pair[0].to_string(),
                target_name: pair[1].to_string(),
                rel_type: "next".to_string(),
                source_urls: Vec::new(),
                fields: vec![],
                id: None,
                line,
            });
        }
    }

    rels
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_single_chain() {
        let body = "\nEvent A -> Event B -> Event C\n";
        let names = vec!["Event A", "Event B", "Event C"];
        let mut errors = Vec::new();

        let rels = parse_timeline(body, 80, &names, &mut errors);
        assert!(errors.is_empty(), "errors: {errors:?}");
        assert_eq!(rels.len(), 2);
        assert_eq!(rels[0].source_name, "Event A");
        assert_eq!(rels[0].target_name, "Event B");
        assert_eq!(rels[0].rel_type, "next");
        assert!(rels[0].source_urls.is_empty());
        assert_eq!(rels[1].source_name, "Event B");
        assert_eq!(rels[1].target_name, "Event C");
    }

    #[test]
    fn parse_multiple_chains() {
        let body = ["", "A -> B -> C", "D -> E", ""].join("\n");
        let names = vec!["A", "B", "C", "D", "E"];
        let mut errors = Vec::new();

        let rels = parse_timeline(&body, 1, &names, &mut errors);
        assert!(errors.is_empty(), "errors: {errors:?}");
        assert_eq!(rels.len(), 3); // A->B, B->C, D->E
    }

    #[test]
    fn reject_single_event() {
        let body = "\nJust One Event\n";
        let mut errors = Vec::new();

        parse_timeline(body, 1, &[], &mut errors);
        assert!(errors.iter().any(|e| e.message.contains("at least 2")));
    }

    #[test]
    fn reject_unknown_event() {
        let body = "\nKnown -> Unknown\n";
        let names = vec!["Known"];
        let mut errors = Vec::new();

        parse_timeline(body, 1, &names, &mut errors);
        assert!(
            errors
                .iter()
                .any(|e| e.message.contains("not found in Events"))
        );
    }

    #[test]
    fn reject_too_many_chains() {
        let lines: Vec<String> = (0..11).map(|i| format!("E{i}a -> E{i}b")).collect();
        let body = format!("\n{}\n", lines.join("\n"));
        let owned: Vec<String> = (0..11)
            .flat_map(|i| vec![format!("E{i}a"), format!("E{i}b")])
            .collect();
        let names: Vec<&str> = owned.iter().map(String::as_str).collect();
        let mut errors = Vec::new();

        parse_timeline(&body, 1, &names, &mut errors);
        assert!(
            errors
                .iter()
                .any(|e| e.message.contains("too many timeline chains"))
        );
    }

    #[test]
    fn reject_too_many_events_per_chain() {
        let events: Vec<String> = (0..21).map(|i| format!("E{i}")).collect();
        let body = format!("\n{}\n", events.join(" -> "));
        let names: Vec<&str> = events.iter().map(String::as_str).collect();
        let mut errors = Vec::new();

        parse_timeline(&body, 1, &names, &mut errors);
        assert!(errors.iter().any(|e| e.message.contains("exceeds 20")));
    }

    #[test]
    fn empty_timeline() {
        let body = "\n\n\n";
        let mut errors = Vec::new();

        let rels = parse_timeline(body, 1, &[], &mut errors);
        assert!(errors.is_empty());
        assert!(rels.is_empty());
    }
}