Skip to main content

weave_content/
relationship.rs

1use std::collections::HashSet;
2
3use crate::parser::{ParseError, SourceEntry};
4
5/// Maximum relationships per file.
6const MAX_RELATIONSHIPS_PER_FILE: usize = 200;
7
8/// All known relationship types, organized by category per ADR-014 ยง3.
9const KNOWN_REL_TYPES: &[&str] = &[
10    // Organizational
11    "employed_by",
12    "member_of",
13    "leads",
14    "founded",
15    "owns",
16    "subsidiary_of",
17    // Legal/Criminal
18    "charged_with",
19    "convicted_of",
20    "investigated_by",
21    "prosecuted_by",
22    "defended_by",
23    "testified_in",
24    "sentenced_to",
25    "appealed",
26    "acquitted_of",
27    "pardoned_by",
28    "arrested_by",
29    // Financial
30    "paid_to",
31    "received_from",
32    "funded_by",
33    "awarded_contract",
34    "approved_budget",
35    "seized_from",
36    // Governance
37    "appointed_by",
38    "approved_by",
39    "regulated_by",
40    "licensed_by",
41    "lobbied",
42    // Personal
43    "family_of",
44    "associate_of",
45    // Temporal
46    "preceded_by",
47    // Document
48    "documents",
49    "authorizes",
50    "references",
51    // Case
52    "part_of",
53    "involved_in",
54    // Source
55    "sourced_by",
56];
57
58/// Known fields on relationships (nested bullets).
59const REL_FIELDS: &[&str] = &[
60    "id",
61    "source",
62    "description",
63    "amount",
64    "currency",
65    "valid_from",
66    "valid_until",
67];
68
69/// A parsed relationship.
70#[derive(Debug)]
71#[allow(clippy::struct_field_names)]
72pub struct Rel {
73    pub source_name: String,
74    pub target_name: String,
75    pub rel_type: String,
76    pub source_urls: Vec<String>,
77    pub fields: Vec<(String, String)>,
78    /// Stored NULID from `- id:` field (None if not yet generated).
79    pub id: Option<String>,
80    /// Line number (1-indexed) in the original file.
81    pub line: usize,
82}
83
84/// Parse relationships from the `## Relationships` section body.
85///
86/// `entity_names` is the set of entity names defined in the file (for resolution).
87/// `default_sources` are the front matter sources used when no `source:` override.
88#[allow(clippy::implicit_hasher)]
89#[allow(clippy::too_many_lines)]
90pub fn parse_relationships(
91    body: &str,
92    section_start_line: usize,
93    entity_names: &HashSet<&str>,
94    default_sources: &[SourceEntry],
95    errors: &mut Vec<ParseError>,
96) -> Vec<Rel> {
97    let lines: Vec<&str> = body.lines().collect();
98    let mut rels: Vec<Rel> = Vec::new();
99
100    // Current relationship being built
101    let mut current: Option<RelBuilder> = None;
102
103    for (i, line) in lines.iter().enumerate() {
104        let file_line = section_start_line + 1 + i;
105        let trimmed = line.trim();
106
107        // Top-level bullet: `- Source -> Target: type`
108        if trimmed.starts_with("- ") && !line.starts_with("  ") {
109            // Flush previous
110            if let Some(builder) = current.take() {
111                rels.push(builder.finish(default_sources));
112            }
113
114            let item = &trimmed[2..];
115            match parse_rel_line(item) {
116                Some((source, target, rel_type)) => {
117                    // Validate rel_type
118                    if !KNOWN_REL_TYPES.contains(&rel_type.as_str()) {
119                        errors.push(ParseError {
120                            line: file_line,
121                            message: format!(
122                                "unknown relationship type {rel_type:?} (known: {})",
123                                KNOWN_REL_TYPES.join(", ")
124                            ),
125                        });
126                    }
127
128                    // Validate entity names
129                    if !entity_names.contains(&source.as_str()) {
130                        errors.push(ParseError {
131                            line: file_line,
132                            message: format!(
133                                "entity {source:?} in relationship not defined in file"
134                            ),
135                        });
136                    }
137                    if !entity_names.contains(&target.as_str()) {
138                        errors.push(ParseError {
139                            line: file_line,
140                            message: format!(
141                                "entity {target:?} in relationship not defined in file"
142                            ),
143                        });
144                    }
145
146                    current = Some(RelBuilder {
147                        source_name: source,
148                        target_name: target,
149                        rel_type,
150                        source_urls: Vec::new(),
151                        fields: Vec::new(),
152                        id: None,
153                        line: file_line,
154                    });
155                }
156                None => {
157                    errors.push(ParseError {
158                        line: file_line,
159                        message: format!(
160                            "invalid relationship syntax: expected `- Source -> Target: type`, got {trimmed:?}"
161                        ),
162                    });
163                }
164            }
165            continue;
166        }
167
168        // Nested bullet: `  - key: value`
169        if line.starts_with("  - ") && current.is_some() {
170            let nested = trimmed.strip_prefix("- ").unwrap_or(trimmed);
171            if let Some((key, value)) = parse_kv(nested) {
172                if !REL_FIELDS.contains(&key.as_str()) {
173                    errors.push(ParseError {
174                        line: file_line,
175                        message: format!("unknown relationship field {key:?}"),
176                    });
177                    continue;
178                }
179
180                let builder = current.as_mut().unwrap_or_else(|| unreachable!());
181
182                if key == "id" {
183                    builder.id = Some(value);
184                } else if key == "source" {
185                    if !value.starts_with("https://") {
186                        errors.push(ParseError {
187                            line: file_line,
188                            message: format!("relationship source URL must be HTTPS: {value:?}"),
189                        });
190                    }
191                    builder.source_urls.push(value);
192                } else {
193                    // Validate field constraints
194                    validate_rel_field(&key, &value, file_line, errors);
195                    builder.fields.push((key, value));
196                }
197            } else {
198                errors.push(ParseError {
199                    line: file_line,
200                    message: format!(
201                        "invalid nested field syntax: expected `- key: value`, got {trimmed:?}"
202                    ),
203                });
204            }
205        }
206
207        // Ignore blank lines
208    }
209
210    // Flush last
211    if let Some(builder) = current.take() {
212        rels.push(builder.finish(default_sources));
213    }
214
215    // Boundary check
216    if rels.len() > MAX_RELATIONSHIPS_PER_FILE {
217        errors.push(ParseError {
218            line: section_start_line,
219            message: format!(
220                "too many relationships (max {MAX_RELATIONSHIPS_PER_FILE}, got {})",
221                rels.len()
222            ),
223        });
224    }
225
226    rels
227}
228
229struct RelBuilder {
230    source_name: String,
231    target_name: String,
232    rel_type: String,
233    source_urls: Vec<String>,
234    fields: Vec<(String, String)>,
235    id: Option<String>,
236    line: usize,
237}
238
239impl RelBuilder {
240    fn finish(self, default_sources: &[SourceEntry]) -> Rel {
241        let source_urls = if self.source_urls.is_empty() {
242            default_sources
243                .iter()
244                .map(|s| s.url().to_string())
245                .collect()
246        } else {
247            self.source_urls
248        };
249
250        Rel {
251            source_name: self.source_name,
252            target_name: self.target_name,
253            rel_type: self.rel_type,
254            source_urls,
255            fields: self.fields,
256            id: self.id,
257            line: self.line,
258        }
259    }
260}
261
262/// Parse `Source -> Target: type` from a relationship bullet.
263fn parse_rel_line(item: &str) -> Option<(String, String, String)> {
264    let arrow_pos = item.find(" -> ")?;
265    let source = item[..arrow_pos].trim();
266    let after_arrow = &item[arrow_pos + 4..];
267
268    let colon_pos = after_arrow.rfind(':')?;
269    let target = after_arrow[..colon_pos].trim();
270    let rel_type = after_arrow[colon_pos + 1..]
271        .trim()
272        .to_lowercase()
273        .replace(' ', "_");
274
275    if source.is_empty() || target.is_empty() || rel_type.is_empty() {
276        return None;
277    }
278
279    Some((source.to_string(), target.to_string(), rel_type))
280}
281
282fn parse_kv(s: &str) -> Option<(String, String)> {
283    let colon = s.find(':')?;
284    let key = s[..colon].trim();
285    if key.is_empty() {
286        return None;
287    }
288    let value = s[colon + 1..].trim();
289    Some((key.to_string(), value.to_string()))
290}
291
292fn validate_rel_field(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
293    let max = match key {
294        "description" => 1000,
295        "amount" => 50,
296        "currency" | "valid_from" | "valid_until" => 10,
297        _ => return,
298    };
299
300    if value.len() > max {
301        errors.push(ParseError {
302            line,
303            message: format!(
304                "relationship field {key:?} exceeds {max} chars (got {})",
305                value.len()
306            ),
307        });
308    }
309
310    // Date format validation
311    if matches!(key, "valid_from" | "valid_until") && !value.is_empty() {
312        let valid = matches!(value.len(), 4 | 7 | 10)
313            && value.chars().enumerate().all(|(i, c)| match i {
314                4 | 7 => c == '-',
315                _ => c.is_ascii_digit(),
316            });
317        if !valid {
318            errors.push(ParseError {
319                line,
320                message: format!(
321                    "relationship field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"
322                ),
323            });
324        }
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn parse_basic_relationship() {
334        let body = "\n- Alice -> Bob: employed_by\n";
335        let names = HashSet::from(["Alice", "Bob"]);
336        let sources = vec![SourceEntry::Url("https://example.com/src".into())];
337        let mut errors = Vec::new();
338
339        let rels = parse_relationships(body, 50, &names, &sources, &mut errors);
340        assert!(errors.is_empty(), "errors: {errors:?}");
341        assert_eq!(rels.len(), 1);
342        assert_eq!(rels[0].source_name, "Alice");
343        assert_eq!(rels[0].target_name, "Bob");
344        assert_eq!(rels[0].rel_type, "employed_by");
345        // Should default to front matter sources
346        assert_eq!(rels[0].source_urls, vec!["https://example.com/src"]);
347    }
348
349    #[test]
350    fn parse_relationship_with_source_override() {
351        let body = [
352            "",
353            "- Alice -> Bob: associate_of",
354            "  - source: https://specific.com/article",
355            "",
356        ]
357        .join("\n");
358        let names = HashSet::from(["Alice", "Bob"]);
359        let sources = vec![SourceEntry::Url("https://default.com".into())];
360        let mut errors = Vec::new();
361
362        let rels = parse_relationships(&body, 10, &names, &sources, &mut errors);
363        assert!(errors.is_empty(), "errors: {errors:?}");
364        assert_eq!(rels[0].source_urls, vec!["https://specific.com/article"]);
365    }
366
367    #[test]
368    fn parse_relationship_with_fields() {
369        let body = [
370            "",
371            "- Alice -> Corp: paid_to",
372            "  - amount: EUR 50,000",
373            "  - currency: EUR",
374            "  - valid_from: 2020-01",
375            "  - description: Campaign donation",
376            "",
377        ]
378        .join("\n");
379        let names = HashSet::from(["Alice", "Corp"]);
380        let mut errors = Vec::new();
381
382        let rels = parse_relationships(&body, 10, &names, &[], &mut errors);
383        assert!(errors.is_empty(), "errors: {errors:?}");
384        assert_eq!(rels[0].fields.len(), 4);
385    }
386
387    #[test]
388    fn reject_unknown_rel_type() {
389        let body = "\n- Alice -> Bob: best_friends\n";
390        let names = HashSet::from(["Alice", "Bob"]);
391        let mut errors = Vec::new();
392
393        parse_relationships(body, 1, &names, &[], &mut errors);
394        assert!(
395            errors
396                .iter()
397                .any(|e| e.message.contains("unknown relationship type"))
398        );
399    }
400
401    #[test]
402    fn reject_unresolved_entity() {
403        let body = "\n- Alice -> Unknown: employed_by\n";
404        let names = HashSet::from(["Alice"]);
405        let mut errors = Vec::new();
406
407        parse_relationships(body, 1, &names, &[], &mut errors);
408        assert!(
409            errors
410                .iter()
411                .any(|e| e.message.contains("not defined in file"))
412        );
413    }
414
415    #[test]
416    fn reject_non_https_source_override() {
417        let body = [
418            "",
419            "- Alice -> Bob: associate_of",
420            "  - source: http://insecure.com",
421            "",
422        ]
423        .join("\n");
424        let names = HashSet::from(["Alice", "Bob"]);
425        let mut errors = Vec::new();
426
427        parse_relationships(&body, 1, &names, &[], &mut errors);
428        assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
429    }
430
431    #[test]
432    fn reject_unknown_rel_field() {
433        let body = ["", "- Alice -> Bob: associate_of", "  - foobar: value", ""].join("\n");
434        let names = HashSet::from(["Alice", "Bob"]);
435        let mut errors = Vec::new();
436
437        parse_relationships(&body, 1, &names, &[], &mut errors);
438        assert!(
439            errors
440                .iter()
441                .any(|e| e.message.contains("unknown relationship field"))
442        );
443    }
444
445    #[test]
446    fn multiple_relationships() {
447        let body = [
448            "",
449            "- Alice -> Bob: employed_by",
450            "- Bob -> Corp: member_of",
451            "- Corp -> Alice: charged_with",
452            "",
453        ]
454        .join("\n");
455        let names = HashSet::from(["Alice", "Bob", "Corp"]);
456        let mut errors = Vec::new();
457
458        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
459        assert!(errors.is_empty(), "errors: {errors:?}");
460        assert_eq!(rels.len(), 3);
461    }
462
463    #[test]
464    fn parse_rel_line_syntax() {
465        let result = parse_rel_line("Mark Bonnick -> Arsenal FC: employed_by");
466        assert_eq!(
467            result,
468            Some((
469                "Mark Bonnick".into(),
470                "Arsenal FC".into(),
471                "employed_by".into()
472            ))
473        );
474    }
475
476    #[test]
477    fn parse_rel_line_invalid() {
478        assert!(parse_rel_line("not a relationship").is_none());
479        assert!(parse_rel_line("-> Target: type").is_none());
480        assert!(parse_rel_line("Source -> : type").is_none());
481    }
482
483    #[test]
484    fn relationship_date_validation() {
485        let body = [
486            "",
487            "- Alice -> Bob: associate_of",
488            "  - valid_from: not-a-date",
489            "",
490        ]
491        .join("\n");
492        let names = HashSet::from(["Alice", "Bob"]);
493        let mut errors = Vec::new();
494
495        parse_relationships(&body, 1, &names, &[], &mut errors);
496        assert!(errors.iter().any(|e| e.message.contains("YYYY")));
497    }
498
499    #[test]
500    fn multiple_source_overrides() {
501        let body = [
502            "",
503            "- Alice -> Bob: associate_of",
504            "  - source: https://first.com",
505            "  - source: https://second.com",
506            "",
507        ]
508        .join("\n");
509        let names = HashSet::from(["Alice", "Bob"]);
510        let mut errors = Vec::new();
511
512        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
513        assert!(errors.is_empty(), "errors: {errors:?}");
514        assert_eq!(rels[0].source_urls.len(), 2);
515    }
516}