Skip to main content

weave_content/
relationship.rs

1use std::collections::HashSet;
2
3use crate::parser::{ParseError, SourceEntry};
4
5/// Maximum relationships per file.
6const MAX_RELATIONSHIPS_PER_FILE: usize = 200;
7
8/// All known relationship types, organized by category per ADR-014 ยง3.
9const KNOWN_REL_TYPES: &[&str] = &[
10    // Organizational
11    "employed_by",
12    "member_of",
13    "leads",
14    "founded",
15    "owns",
16    "subsidiary_of",
17    // Legal/Criminal
18    "charged_with",
19    "convicted_of",
20    "investigated_by",
21    "prosecuted_by",
22    "defended_by",
23    "testified_in",
24    "sentenced_to",
25    "appealed",
26    "acquitted_of",
27    "pardoned_by",
28    "arrested_by",
29    // Financial
30    "paid_to",
31    "received_from",
32    "funded_by",
33    "awarded_contract",
34    "approved_budget",
35    "seized_from",
36    // Governance
37    "appointed_by",
38    "approved_by",
39    "regulated_by",
40    "licensed_by",
41    "lobbied",
42    // Personal
43    "family_of",
44    "associate_of",
45    // Temporal
46    "preceded_by",
47    // Document
48    "documents",
49    "authorizes",
50    "references",
51    // Case
52    "part_of",
53    // Source
54    "sourced_by",
55];
56
57/// Known fields on relationships (nested bullets).
58const REL_FIELDS: &[&str] = &[
59    "id",
60    "source",
61    "description",
62    "amount",
63    "currency",
64    "valid_from",
65    "valid_until",
66];
67
68/// A parsed relationship.
69#[derive(Debug)]
70#[allow(clippy::struct_field_names)]
71pub struct Rel {
72    pub source_name: String,
73    pub target_name: String,
74    pub rel_type: String,
75    pub source_urls: Vec<String>,
76    pub fields: Vec<(String, String)>,
77    /// Stored NULID from `- id:` field (None if not yet generated).
78    pub id: Option<String>,
79    /// Line number (1-indexed) in the original file.
80    pub line: usize,
81}
82
83/// Parse relationships from the `## Relationships` section body.
84///
85/// `entity_names` is the set of entity names defined in the file (for resolution).
86/// `default_sources` are the front matter sources used when no `source:` override.
87#[allow(clippy::implicit_hasher)]
88#[allow(clippy::too_many_lines)]
89pub fn parse_relationships(
90    body: &str,
91    section_start_line: usize,
92    entity_names: &HashSet<&str>,
93    default_sources: &[SourceEntry],
94    errors: &mut Vec<ParseError>,
95) -> Vec<Rel> {
96    let lines: Vec<&str> = body.lines().collect();
97    let mut rels: Vec<Rel> = Vec::new();
98
99    // Current relationship being built
100    let mut current: Option<RelBuilder> = None;
101
102    for (i, line) in lines.iter().enumerate() {
103        let file_line = section_start_line + 1 + i;
104        let trimmed = line.trim();
105
106        // Top-level bullet: `- Source -> Target: type`
107        if trimmed.starts_with("- ") && !line.starts_with("  ") {
108            // Flush previous
109            if let Some(builder) = current.take() {
110                rels.push(builder.finish(default_sources));
111            }
112
113            let item = &trimmed[2..];
114            match parse_rel_line(item) {
115                Some((source, target, rel_type)) => {
116                    // Validate rel_type
117                    if !KNOWN_REL_TYPES.contains(&rel_type.as_str()) {
118                        errors.push(ParseError {
119                            line: file_line,
120                            message: format!(
121                                "unknown relationship type {rel_type:?} (known: {})",
122                                KNOWN_REL_TYPES.join(", ")
123                            ),
124                        });
125                    }
126
127                    // Validate entity names
128                    if !entity_names.contains(&source.as_str()) {
129                        errors.push(ParseError {
130                            line: file_line,
131                            message: format!(
132                                "entity {source:?} in relationship not defined in file"
133                            ),
134                        });
135                    }
136                    if !entity_names.contains(&target.as_str()) {
137                        errors.push(ParseError {
138                            line: file_line,
139                            message: format!(
140                                "entity {target:?} in relationship not defined in file"
141                            ),
142                        });
143                    }
144
145                    current = Some(RelBuilder {
146                        source_name: source,
147                        target_name: target,
148                        rel_type,
149                        source_urls: Vec::new(),
150                        fields: Vec::new(),
151                        id: None,
152                        line: file_line,
153                    });
154                }
155                None => {
156                    errors.push(ParseError {
157                        line: file_line,
158                        message: format!(
159                            "invalid relationship syntax: expected `- Source -> Target: type`, got {trimmed:?}"
160                        ),
161                    });
162                }
163            }
164            continue;
165        }
166
167        // Nested bullet: `  - key: value`
168        if line.starts_with("  - ") && current.is_some() {
169            let nested = trimmed.strip_prefix("- ").unwrap_or(trimmed);
170            if let Some((key, value)) = parse_kv(nested) {
171                if !REL_FIELDS.contains(&key.as_str()) {
172                    errors.push(ParseError {
173                        line: file_line,
174                        message: format!("unknown relationship field {key:?}"),
175                    });
176                    continue;
177                }
178
179                let builder = current.as_mut().unwrap_or_else(|| unreachable!());
180
181                if key == "id" {
182                    builder.id = Some(value);
183                } else if key == "source" {
184                    if !value.starts_with("https://") {
185                        errors.push(ParseError {
186                            line: file_line,
187                            message: format!("relationship source URL must be HTTPS: {value:?}"),
188                        });
189                    }
190                    builder.source_urls.push(value);
191                } else {
192                    // Validate field constraints
193                    validate_rel_field(&key, &value, file_line, errors);
194                    builder.fields.push((key, value));
195                }
196            } else {
197                errors.push(ParseError {
198                    line: file_line,
199                    message: format!(
200                        "invalid nested field syntax: expected `- key: value`, got {trimmed:?}"
201                    ),
202                });
203            }
204        }
205
206        // Ignore blank lines
207    }
208
209    // Flush last
210    if let Some(builder) = current.take() {
211        rels.push(builder.finish(default_sources));
212    }
213
214    // Boundary check
215    if rels.len() > MAX_RELATIONSHIPS_PER_FILE {
216        errors.push(ParseError {
217            line: section_start_line,
218            message: format!(
219                "too many relationships (max {MAX_RELATIONSHIPS_PER_FILE}, got {})",
220                rels.len()
221            ),
222        });
223    }
224
225    rels
226}
227
228struct RelBuilder {
229    source_name: String,
230    target_name: String,
231    rel_type: String,
232    source_urls: Vec<String>,
233    fields: Vec<(String, String)>,
234    id: Option<String>,
235    line: usize,
236}
237
238impl RelBuilder {
239    fn finish(self, default_sources: &[SourceEntry]) -> Rel {
240        let source_urls = if self.source_urls.is_empty() {
241            default_sources
242                .iter()
243                .map(|s| s.url().to_string())
244                .collect()
245        } else {
246            self.source_urls
247        };
248
249        Rel {
250            source_name: self.source_name,
251            target_name: self.target_name,
252            rel_type: self.rel_type,
253            source_urls,
254            fields: self.fields,
255            id: self.id,
256            line: self.line,
257        }
258    }
259}
260
261/// Parse `Source -> Target: type` from a relationship bullet.
262fn parse_rel_line(item: &str) -> Option<(String, String, String)> {
263    let arrow_pos = item.find(" -> ")?;
264    let source = item[..arrow_pos].trim();
265    let after_arrow = &item[arrow_pos + 4..];
266
267    let colon_pos = after_arrow.rfind(':')?;
268    let target = after_arrow[..colon_pos].trim();
269    let rel_type = after_arrow[colon_pos + 1..]
270        .trim()
271        .to_lowercase()
272        .replace(' ', "_");
273
274    if source.is_empty() || target.is_empty() || rel_type.is_empty() {
275        return None;
276    }
277
278    Some((source.to_string(), target.to_string(), rel_type))
279}
280
281fn parse_kv(s: &str) -> Option<(String, String)> {
282    let colon = s.find(':')?;
283    let key = s[..colon].trim();
284    if key.is_empty() {
285        return None;
286    }
287    let value = s[colon + 1..].trim();
288    Some((key.to_string(), value.to_string()))
289}
290
291fn validate_rel_field(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
292    let max = match key {
293        "description" => 1000,
294        "amount" => 50,
295        "currency" | "valid_from" | "valid_until" => 10,
296        _ => return,
297    };
298
299    if value.len() > max {
300        errors.push(ParseError {
301            line,
302            message: format!(
303                "relationship field {key:?} exceeds {max} chars (got {})",
304                value.len()
305            ),
306        });
307    }
308
309    // Date format validation
310    if matches!(key, "valid_from" | "valid_until") && !value.is_empty() {
311        let valid = matches!(value.len(), 4 | 7 | 10)
312            && value.chars().enumerate().all(|(i, c)| match i {
313                4 | 7 => c == '-',
314                _ => c.is_ascii_digit(),
315            });
316        if !valid {
317            errors.push(ParseError {
318                line,
319                message: format!(
320                    "relationship field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"
321                ),
322            });
323        }
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn parse_basic_relationship() {
333        let body = "\n- Alice -> Bob: employed_by\n";
334        let names = HashSet::from(["Alice", "Bob"]);
335        let sources = vec![SourceEntry::Url("https://example.com/src".into())];
336        let mut errors = Vec::new();
337
338        let rels = parse_relationships(body, 50, &names, &sources, &mut errors);
339        assert!(errors.is_empty(), "errors: {errors:?}");
340        assert_eq!(rels.len(), 1);
341        assert_eq!(rels[0].source_name, "Alice");
342        assert_eq!(rels[0].target_name, "Bob");
343        assert_eq!(rels[0].rel_type, "employed_by");
344        // Should default to front matter sources
345        assert_eq!(rels[0].source_urls, vec!["https://example.com/src"]);
346    }
347
348    #[test]
349    fn parse_relationship_with_source_override() {
350        let body = [
351            "",
352            "- Alice -> Bob: associate_of",
353            "  - source: https://specific.com/article",
354            "",
355        ]
356        .join("\n");
357        let names = HashSet::from(["Alice", "Bob"]);
358        let sources = vec![SourceEntry::Url("https://default.com".into())];
359        let mut errors = Vec::new();
360
361        let rels = parse_relationships(&body, 10, &names, &sources, &mut errors);
362        assert!(errors.is_empty(), "errors: {errors:?}");
363        assert_eq!(rels[0].source_urls, vec!["https://specific.com/article"]);
364    }
365
366    #[test]
367    fn parse_relationship_with_fields() {
368        let body = [
369            "",
370            "- Alice -> Corp: paid_to",
371            "  - amount: EUR 50,000",
372            "  - currency: EUR",
373            "  - valid_from: 2020-01",
374            "  - description: Campaign donation",
375            "",
376        ]
377        .join("\n");
378        let names = HashSet::from(["Alice", "Corp"]);
379        let mut errors = Vec::new();
380
381        let rels = parse_relationships(&body, 10, &names, &[], &mut errors);
382        assert!(errors.is_empty(), "errors: {errors:?}");
383        assert_eq!(rels[0].fields.len(), 4);
384    }
385
386    #[test]
387    fn reject_unknown_rel_type() {
388        let body = "\n- Alice -> Bob: best_friends\n";
389        let names = HashSet::from(["Alice", "Bob"]);
390        let mut errors = Vec::new();
391
392        parse_relationships(body, 1, &names, &[], &mut errors);
393        assert!(
394            errors
395                .iter()
396                .any(|e| e.message.contains("unknown relationship type"))
397        );
398    }
399
400    #[test]
401    fn reject_unresolved_entity() {
402        let body = "\n- Alice -> Unknown: employed_by\n";
403        let names = HashSet::from(["Alice"]);
404        let mut errors = Vec::new();
405
406        parse_relationships(body, 1, &names, &[], &mut errors);
407        assert!(
408            errors
409                .iter()
410                .any(|e| e.message.contains("not defined in file"))
411        );
412    }
413
414    #[test]
415    fn reject_non_https_source_override() {
416        let body = [
417            "",
418            "- Alice -> Bob: associate_of",
419            "  - source: http://insecure.com",
420            "",
421        ]
422        .join("\n");
423        let names = HashSet::from(["Alice", "Bob"]);
424        let mut errors = Vec::new();
425
426        parse_relationships(&body, 1, &names, &[], &mut errors);
427        assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
428    }
429
430    #[test]
431    fn reject_unknown_rel_field() {
432        let body = ["", "- Alice -> Bob: associate_of", "  - foobar: value", ""].join("\n");
433        let names = HashSet::from(["Alice", "Bob"]);
434        let mut errors = Vec::new();
435
436        parse_relationships(&body, 1, &names, &[], &mut errors);
437        assert!(
438            errors
439                .iter()
440                .any(|e| e.message.contains("unknown relationship field"))
441        );
442    }
443
444    #[test]
445    fn multiple_relationships() {
446        let body = [
447            "",
448            "- Alice -> Bob: employed_by",
449            "- Bob -> Corp: member_of",
450            "- Corp -> Alice: charged_with",
451            "",
452        ]
453        .join("\n");
454        let names = HashSet::from(["Alice", "Bob", "Corp"]);
455        let mut errors = Vec::new();
456
457        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
458        assert!(errors.is_empty(), "errors: {errors:?}");
459        assert_eq!(rels.len(), 3);
460    }
461
462    #[test]
463    fn parse_rel_line_syntax() {
464        let result = parse_rel_line("Mark Bonnick -> Arsenal FC: employed_by");
465        assert_eq!(
466            result,
467            Some((
468                "Mark Bonnick".into(),
469                "Arsenal FC".into(),
470                "employed_by".into()
471            ))
472        );
473    }
474
475    #[test]
476    fn parse_rel_line_invalid() {
477        assert!(parse_rel_line("not a relationship").is_none());
478        assert!(parse_rel_line("-> Target: type").is_none());
479        assert!(parse_rel_line("Source -> : type").is_none());
480    }
481
482    #[test]
483    fn relationship_date_validation() {
484        let body = [
485            "",
486            "- Alice -> Bob: associate_of",
487            "  - valid_from: not-a-date",
488            "",
489        ]
490        .join("\n");
491        let names = HashSet::from(["Alice", "Bob"]);
492        let mut errors = Vec::new();
493
494        parse_relationships(&body, 1, &names, &[], &mut errors);
495        assert!(errors.iter().any(|e| e.message.contains("YYYY")));
496    }
497
498    #[test]
499    fn multiple_source_overrides() {
500        let body = [
501            "",
502            "- Alice -> Bob: associate_of",
503            "  - source: https://first.com",
504            "  - source: https://second.com",
505            "",
506        ]
507        .join("\n");
508        let names = HashSet::from(["Alice", "Bob"]);
509        let mut errors = Vec::new();
510
511        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
512        assert!(errors.is_empty(), "errors: {errors:?}");
513        assert_eq!(rels[0].source_urls.len(), 2);
514    }
515}