Skip to main content

weave_content/
relationship.rs

1use std::collections::HashSet;
2
3use crate::parser::{ParseError, SourceEntry};
4
5/// Maximum relationships per file.
6const MAX_RELATIONSHIPS_PER_FILE: usize = 200;
7
8/// All known relationship types, organized by category per ADR-014 ยง3.
9const KNOWN_REL_TYPES: &[&str] = &[
10    // Organizational
11    "employed_by",
12    "member_of",
13    "leads",
14    "founded",
15    "owns",
16    "subsidiary_of",
17    // Legal/Criminal
18    "charged_with",
19    "convicted_of",
20    "investigated_by",
21    "prosecuted_by",
22    "defended_by",
23    "testified_in",
24    "sentenced_to",
25    "appealed",
26    "acquitted_of",
27    "pardoned_by",
28    "arrested_by",
29    // Financial
30    "paid_to",
31    "received_from",
32    "funded_by",
33    "awarded_contract",
34    "approved_budget",
35    "seized_from",
36    // Governance
37    "appointed_by",
38    "approved_by",
39    "regulated_by",
40    "licensed_by",
41    "lobbied",
42    // Personal
43    "family_of",
44    "associate_of",
45    // Temporal
46    "preceded_by",
47    // Document
48    "documents",
49    "authorizes",
50    "references",
51    // Case
52    "related_to",
53    "part_of",
54    "involved_in",
55    // Source
56    "sourced_by",
57];
58
59/// Known fields on relationships (nested bullets).
60const REL_FIELDS: &[&str] = &[
61    "id",
62    "source",
63    "description",
64    "amounts",
65    "valid_from",
66    "valid_until",
67];
68
69/// A parsed relationship.
70#[derive(Debug)]
71pub struct Rel {
72    pub source_name: String,
73    pub target_name: String,
74    pub rel_type: String,
75    pub source_urls: Vec<String>,
76    pub fields: Vec<(String, String)>,
77    /// Stored NULID from `id:` field (None if not yet generated).
78    pub id: Option<String>,
79    /// Line number (1-indexed) in the original file.
80    pub line: usize,
81}
82
83/// Parse relationships from the `## Relationships` section body.
84///
85/// `entity_names` is the set of entity names defined in the file (for resolution).
86/// `default_sources` are the front matter sources used when no `source:` override.
87#[allow(clippy::too_many_lines)]
88pub fn parse_relationships(
89    body: &str,
90    section_start_line: usize,
91    entity_names: &HashSet<&str>,
92    default_sources: &[SourceEntry],
93    errors: &mut Vec<ParseError>,
94) -> Vec<Rel> {
95    let lines: Vec<&str> = body.lines().collect();
96    let mut rels: Vec<Rel> = Vec::new();
97
98    // Current relationship being built
99    let mut current: Option<RelBuilder> = None;
100
101    for (i, line) in lines.iter().enumerate() {
102        let file_line = section_start_line + 1 + i;
103        let trimmed = line.trim();
104
105        // Top-level bullet: `- Source -> Target: type`
106        if trimmed.starts_with("- ") && !line.starts_with("  ") {
107            // Flush previous
108            if let Some(builder) = current.take() {
109                rels.push(builder.finish(default_sources));
110            }
111
112            let item = &trimmed[2..];
113            match parse_rel_line(item) {
114                Some((source, target, rel_type)) => {
115                    // Validate rel_type
116                    if !KNOWN_REL_TYPES.contains(&rel_type.as_str()) {
117                        errors.push(ParseError {
118                            line: file_line,
119                            message: format!(
120                                "unknown relationship type {rel_type:?} (known: {})",
121                                KNOWN_REL_TYPES.join(", ")
122                            ),
123                        });
124                    }
125
126                    // Validate entity names
127                    if !entity_names.contains(&source.as_str()) {
128                        errors.push(ParseError {
129                            line: file_line,
130                            message: format!(
131                                "entity {source:?} in relationship not defined in file"
132                            ),
133                        });
134                    }
135                    if !entity_names.contains(&target.as_str()) {
136                        errors.push(ParseError {
137                            line: file_line,
138                            message: format!(
139                                "entity {target:?} in relationship not defined in file"
140                            ),
141                        });
142                    }
143
144                    current = Some(RelBuilder {
145                        source_name: source,
146                        target_name: target,
147                        rel_type,
148                        source_urls: Vec::new(),
149                        fields: Vec::new(),
150                        id: None,
151                        line: file_line,
152                    });
153                }
154                None => {
155                    errors.push(ParseError {
156                        line: file_line,
157                        message: format!(
158                            "invalid relationship syntax: expected `- Source -> Target: type`, got {trimmed:?}"
159                        ),
160                    });
161                }
162            }
163            continue;
164        }
165
166        // Indented field: `  key: value`
167        if line.starts_with("  ") && current.is_some() {
168            if let Some((key, value)) = parse_kv(trimmed) {
169                if !REL_FIELDS.contains(&key.as_str()) {
170                    errors.push(ParseError {
171                        line: file_line,
172                        message: format!("unknown relationship field {key:?}"),
173                    });
174                    continue;
175                }
176
177                let builder = current.as_mut().unwrap_or_else(|| unreachable!());
178
179                if key == "id" {
180                    builder.id = Some(value);
181                } else if key == "source" {
182                    if !value.starts_with("https://") {
183                        errors.push(ParseError {
184                            line: file_line,
185                            message: format!("relationship source URL must be HTTPS: {value:?}"),
186                        });
187                    }
188                    builder.source_urls.push(value);
189                } else {
190                    // Validate field constraints
191                    validate_rel_field(&key, &value, file_line, errors);
192                    builder.fields.push((key, value));
193                }
194            } else {
195                errors.push(ParseError {
196                    line: file_line,
197                    message: format!(
198                        "invalid field syntax: expected `key: value`, got {trimmed:?}"
199                    ),
200                });
201            }
202        }
203
204        // Ignore blank lines
205    }
206
207    // Flush last
208    if let Some(builder) = current.take() {
209        rels.push(builder.finish(default_sources));
210    }
211
212    // Boundary check
213    if rels.len() > MAX_RELATIONSHIPS_PER_FILE {
214        errors.push(ParseError {
215            line: section_start_line,
216            message: format!(
217                "too many relationships (max {MAX_RELATIONSHIPS_PER_FILE}, got {})",
218                rels.len()
219            ),
220        });
221    }
222
223    rels
224}
225
226struct RelBuilder {
227    source_name: String,
228    target_name: String,
229    rel_type: String,
230    source_urls: Vec<String>,
231    fields: Vec<(String, String)>,
232    id: Option<String>,
233    line: usize,
234}
235
236impl RelBuilder {
237    fn finish(self, default_sources: &[SourceEntry]) -> Rel {
238        let source_urls = if self.source_urls.is_empty() {
239            default_sources
240                .iter()
241                .map(|s| s.url().to_string())
242                .collect()
243        } else {
244            self.source_urls
245        };
246
247        Rel {
248            source_name: self.source_name,
249            target_name: self.target_name,
250            rel_type: self.rel_type,
251            source_urls,
252            fields: self.fields,
253            id: self.id,
254            line: self.line,
255        }
256    }
257}
258
259/// Parse `Source -> Target: type` from a relationship bullet.
260fn parse_rel_line(item: &str) -> Option<(String, String, String)> {
261    let arrow_pos = item.find(" -> ")?;
262    let source = item[..arrow_pos].trim();
263    let after_arrow = &item[arrow_pos + 4..];
264
265    let colon_pos = after_arrow.rfind(':')?;
266    let target = after_arrow[..colon_pos].trim();
267    let rel_type = after_arrow[colon_pos + 1..]
268        .trim()
269        .to_lowercase()
270        .replace(' ', "_");
271
272    if source.is_empty() || target.is_empty() || rel_type.is_empty() {
273        return None;
274    }
275
276    Some((source.to_string(), target.to_string(), rel_type))
277}
278
279fn parse_kv(s: &str) -> Option<(String, String)> {
280    let colon = s.find(':')?;
281    let key = s[..colon].trim();
282    if key.is_empty() {
283        return None;
284    }
285    let value = s[colon + 1..].trim();
286    Some((key.to_string(), value.to_string()))
287}
288
289fn validate_rel_field(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
290    let max = match key {
291        "description" => 1000,
292        "amounts" => 200,
293        "valid_from" | "valid_until" => 10,
294        _ => return,
295    };
296
297    if value.len() > max {
298        errors.push(ParseError {
299            line,
300            message: format!(
301                "relationship field {key:?} exceeds {max} chars (got {})",
302                value.len()
303            ),
304        });
305    }
306
307    // Date format validation
308    if matches!(key, "valid_from" | "valid_until") && !value.is_empty() {
309        let valid = matches!(value.len(), 4 | 7 | 10)
310            && value.chars().enumerate().all(|(i, c)| match i {
311                4 | 7 => c == '-',
312                _ => c.is_ascii_digit(),
313            });
314        if !valid {
315            errors.push(ParseError {
316                line,
317                message: format!(
318                    "relationship field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"
319                ),
320            });
321        }
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    #[test]
330    fn parse_basic_relationship() {
331        let body = "\n- Alice -> Bob: employed_by\n";
332        let names = HashSet::from(["Alice", "Bob"]);
333        let sources = vec![SourceEntry::Url("https://example.com/src".into())];
334        let mut errors = Vec::new();
335
336        let rels = parse_relationships(body, 50, &names, &sources, &mut errors);
337        assert!(errors.is_empty(), "errors: {errors:?}");
338        assert_eq!(rels.len(), 1);
339        assert_eq!(rels[0].source_name, "Alice");
340        assert_eq!(rels[0].target_name, "Bob");
341        assert_eq!(rels[0].rel_type, "employed_by");
342        // Should default to front matter sources
343        assert_eq!(rels[0].source_urls, vec!["https://example.com/src"]);
344    }
345
346    #[test]
347    fn parse_relationship_with_source_override() {
348        let body = [
349            "",
350            "- Alice -> Bob: associate_of",
351            "  source: https://specific.com/article",
352            "",
353        ]
354        .join("\n");
355        let names = HashSet::from(["Alice", "Bob"]);
356        let sources = vec![SourceEntry::Url("https://default.com".into())];
357        let mut errors = Vec::new();
358
359        let rels = parse_relationships(&body, 10, &names, &sources, &mut errors);
360        assert!(errors.is_empty(), "errors: {errors:?}");
361        assert_eq!(rels[0].source_urls, vec!["https://specific.com/article"]);
362    }
363
364    #[test]
365    fn parse_relationship_with_fields() {
366        let body = [
367            "",
368            "- Alice -> Corp: paid_to",
369            "  amounts: 50000 EUR",
370            "  valid_from: 2020-01",
371            "  description: Campaign donation",
372            "",
373        ]
374        .join("\n");
375        let names = HashSet::from(["Alice", "Corp"]);
376        let mut errors = Vec::new();
377
378        let rels = parse_relationships(&body, 10, &names, &[], &mut errors);
379        assert!(errors.is_empty(), "errors: {errors:?}");
380        assert_eq!(rels[0].fields.len(), 3);
381    }
382
383    #[test]
384    fn reject_unknown_rel_type() {
385        let body = "\n- Alice -> Bob: best_friends\n";
386        let names = HashSet::from(["Alice", "Bob"]);
387        let mut errors = Vec::new();
388
389        parse_relationships(body, 1, &names, &[], &mut errors);
390        assert!(
391            errors
392                .iter()
393                .any(|e| e.message.contains("unknown relationship type"))
394        );
395    }
396
397    #[test]
398    fn reject_unresolved_entity() {
399        let body = "\n- Alice -> Unknown: employed_by\n";
400        let names = HashSet::from(["Alice"]);
401        let mut errors = Vec::new();
402
403        parse_relationships(body, 1, &names, &[], &mut errors);
404        assert!(
405            errors
406                .iter()
407                .any(|e| e.message.contains("not defined in file"))
408        );
409    }
410
411    #[test]
412    fn reject_non_https_source_override() {
413        let body = [
414            "",
415            "- Alice -> Bob: associate_of",
416            "  source: http://insecure.com",
417            "",
418        ]
419        .join("\n");
420        let names = HashSet::from(["Alice", "Bob"]);
421        let mut errors = Vec::new();
422
423        parse_relationships(&body, 1, &names, &[], &mut errors);
424        assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
425    }
426
427    #[test]
428    fn reject_unknown_rel_field() {
429        let body = ["", "- Alice -> Bob: associate_of", "  foobar: value", ""].join("\n");
430        let names = HashSet::from(["Alice", "Bob"]);
431        let mut errors = Vec::new();
432
433        parse_relationships(&body, 1, &names, &[], &mut errors);
434        assert!(
435            errors
436                .iter()
437                .any(|e| e.message.contains("unknown relationship field"))
438        );
439    }
440
441    #[test]
442    fn multiple_relationships() {
443        let body = [
444            "",
445            "- Alice -> Bob: employed_by",
446            "- Bob -> Corp: member_of",
447            "- Corp -> Alice: charged_with",
448            "",
449        ]
450        .join("\n");
451        let names = HashSet::from(["Alice", "Bob", "Corp"]);
452        let mut errors = Vec::new();
453
454        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
455        assert!(errors.is_empty(), "errors: {errors:?}");
456        assert_eq!(rels.len(), 3);
457    }
458
459    #[test]
460    fn parse_rel_line_syntax() {
461        let result = parse_rel_line("Mark Bonnick -> Arsenal FC: employed_by");
462        assert_eq!(
463            result,
464            Some((
465                "Mark Bonnick".into(),
466                "Arsenal FC".into(),
467                "employed_by".into()
468            ))
469        );
470    }
471
472    #[test]
473    fn parse_rel_line_invalid() {
474        assert!(parse_rel_line("not a relationship").is_none());
475        assert!(parse_rel_line("-> Target: type").is_none());
476        assert!(parse_rel_line("Source -> : type").is_none());
477    }
478
479    #[test]
480    fn relationship_date_validation() {
481        let body = [
482            "",
483            "- Alice -> Bob: associate_of",
484            "  valid_from: not-a-date",
485            "",
486        ]
487        .join("\n");
488        let names = HashSet::from(["Alice", "Bob"]);
489        let mut errors = Vec::new();
490
491        parse_relationships(&body, 1, &names, &[], &mut errors);
492        assert!(errors.iter().any(|e| e.message.contains("YYYY")));
493    }
494
495    #[test]
496    fn multiple_source_overrides() {
497        let body = [
498            "",
499            "- Alice -> Bob: associate_of",
500            "  source: https://first.com",
501            "  source: https://second.com",
502            "",
503        ]
504        .join("\n");
505        let names = HashSet::from(["Alice", "Bob"]);
506        let mut errors = Vec::new();
507
508        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
509        assert!(errors.is_empty(), "errors: {errors:?}");
510        assert_eq!(rels[0].source_urls.len(), 2);
511    }
512}