Skip to main content

weave_content/
relationship.rs

1use crate::parser::ParseError;
2
3/// Maximum relationships per file.
4const MAX_RELATIONSHIPS_PER_FILE: usize = 200;
5
6/// All 20 known relationship types.
7const KNOWN_REL_TYPES: &[&str] = &[
8    "affiliated_with",
9    "appointed_by",
10    "charged_in",
11    "contracted_with",
12    "convicted_in",
13    "donated_to",
14    "employed_by",
15    "endorsed_by",
16    "family_of",
17    "funded_by",
18    "investigated_by",
19    "lobbied_for",
20    "member_of",
21    "next",
22    "owns",
23    "related_to",
24    "represented_by",
25    "sponsored_by",
26    "sued_by",
27    "transferred_to",
28];
29
30/// Known fields on relationships (nested bullets).
31const REL_FIELDS: &[&str] = &[
32    "id",
33    "source",
34    "description",
35    "amount",
36    "currency",
37    "effective_date",
38    "expiry_date",
39];
40
41/// A parsed relationship.
42#[derive(Debug)]
43#[allow(clippy::struct_field_names)]
44pub struct Rel {
45    pub source_name: String,
46    pub target_name: String,
47    pub rel_type: String,
48    pub source_urls: Vec<String>,
49    pub fields: Vec<(String, String)>,
50    /// Stored NULID from `- id:` field (None if not yet generated).
51    pub id: Option<String>,
52    /// Line number (1-indexed) in the original file.
53    pub line: usize,
54}
55
56/// Parse relationships from the `## Relationships` section body.
57///
58/// `entity_names` is the set of entity names defined in the file (for resolution).
59/// `default_sources` are the front matter sources used when no `source:` override.
60#[allow(clippy::too_many_lines)]
61pub fn parse_relationships(
62    body: &str,
63    section_start_line: usize,
64    entity_names: &[&str],
65    default_sources: &[String],
66    errors: &mut Vec<ParseError>,
67) -> Vec<Rel> {
68    let lines: Vec<&str> = body.lines().collect();
69    let mut rels: Vec<Rel> = Vec::new();
70
71    // Current relationship being built
72    let mut current: Option<RelBuilder> = None;
73
74    for (i, line) in lines.iter().enumerate() {
75        let file_line = section_start_line + 1 + i;
76        let trimmed = line.trim();
77
78        // Top-level bullet: `- Source -> Target: type`
79        if trimmed.starts_with("- ") && !line.starts_with("  ") {
80            // Flush previous
81            if let Some(builder) = current.take() {
82                rels.push(builder.finish(default_sources));
83            }
84
85            let item = &trimmed[2..];
86            match parse_rel_line(item) {
87                Some((source, target, rel_type)) => {
88                    // Validate rel_type
89                    if !KNOWN_REL_TYPES.contains(&rel_type.as_str()) {
90                        errors.push(ParseError {
91                            line: file_line,
92                            message: format!(
93                                "unknown relationship type {rel_type:?} (known: {})",
94                                KNOWN_REL_TYPES.join(", ")
95                            ),
96                        });
97                    }
98
99                    // Validate entity names
100                    if !entity_names.contains(&source.as_str()) {
101                        errors.push(ParseError {
102                            line: file_line,
103                            message: format!(
104                                "entity {source:?} in relationship not defined in file"
105                            ),
106                        });
107                    }
108                    if !entity_names.contains(&target.as_str()) {
109                        errors.push(ParseError {
110                            line: file_line,
111                            message: format!(
112                                "entity {target:?} in relationship not defined in file"
113                            ),
114                        });
115                    }
116
117                    current = Some(RelBuilder {
118                        source_name: source,
119                        target_name: target,
120                        rel_type,
121                        source_urls: Vec::new(),
122                        fields: Vec::new(),
123                        id: None,
124                        line: file_line,
125                    });
126                }
127                None => {
128                    errors.push(ParseError {
129                        line: file_line,
130                        message: format!(
131                            "invalid relationship syntax: expected `- Source -> Target: type`, got {trimmed:?}"
132                        ),
133                    });
134                }
135            }
136            continue;
137        }
138
139        // Nested bullet: `  - key: value`
140        if line.starts_with("  - ") && current.is_some() {
141            let nested = trimmed.strip_prefix("- ").unwrap_or(trimmed);
142            if let Some((key, value)) = parse_kv(nested) {
143                if !REL_FIELDS.contains(&key.as_str()) {
144                    errors.push(ParseError {
145                        line: file_line,
146                        message: format!("unknown relationship field {key:?}"),
147                    });
148                    continue;
149                }
150
151                let builder = current.as_mut().unwrap_or_else(|| unreachable!());
152
153                if key == "id" {
154                    builder.id = Some(value);
155                } else if key == "source" {
156                    if !value.starts_with("https://") {
157                        errors.push(ParseError {
158                            line: file_line,
159                            message: format!("relationship source URL must be HTTPS: {value:?}"),
160                        });
161                    }
162                    builder.source_urls.push(value);
163                } else {
164                    // Validate field constraints
165                    validate_rel_field(&key, &value, file_line, errors);
166                    builder.fields.push((key, value));
167                }
168            } else {
169                errors.push(ParseError {
170                    line: file_line,
171                    message: format!(
172                        "invalid nested field syntax: expected `- key: value`, got {trimmed:?}"
173                    ),
174                });
175            }
176        }
177
178        // Ignore blank lines
179    }
180
181    // Flush last
182    if let Some(builder) = current.take() {
183        rels.push(builder.finish(default_sources));
184    }
185
186    // Boundary check
187    if rels.len() > MAX_RELATIONSHIPS_PER_FILE {
188        errors.push(ParseError {
189            line: section_start_line,
190            message: format!(
191                "too many relationships (max {MAX_RELATIONSHIPS_PER_FILE}, got {})",
192                rels.len()
193            ),
194        });
195    }
196
197    rels
198}
199
200struct RelBuilder {
201    source_name: String,
202    target_name: String,
203    rel_type: String,
204    source_urls: Vec<String>,
205    fields: Vec<(String, String)>,
206    id: Option<String>,
207    line: usize,
208}
209
210impl RelBuilder {
211    fn finish(self, default_sources: &[String]) -> Rel {
212        let source_urls = if self.source_urls.is_empty() {
213            default_sources.to_vec()
214        } else {
215            self.source_urls
216        };
217
218        Rel {
219            source_name: self.source_name,
220            target_name: self.target_name,
221            rel_type: self.rel_type,
222            source_urls,
223            fields: self.fields,
224            id: self.id,
225            line: self.line,
226        }
227    }
228}
229
230/// Parse `Source -> Target: type` from a relationship bullet.
231fn parse_rel_line(item: &str) -> Option<(String, String, String)> {
232    let arrow_pos = item.find(" -> ")?;
233    let source = item[..arrow_pos].trim();
234    let after_arrow = &item[arrow_pos + 4..];
235
236    let colon_pos = after_arrow.rfind(':')?;
237    let target = after_arrow[..colon_pos].trim();
238    let rel_type = after_arrow[colon_pos + 1..]
239        .trim()
240        .to_lowercase()
241        .replace(' ', "_");
242
243    if source.is_empty() || target.is_empty() || rel_type.is_empty() {
244        return None;
245    }
246
247    Some((source.to_string(), target.to_string(), rel_type))
248}
249
250fn parse_kv(s: &str) -> Option<(String, String)> {
251    let colon = s.find(':')?;
252    let key = s[..colon].trim();
253    if key.is_empty() {
254        return None;
255    }
256    let value = s[colon + 1..].trim();
257    Some((key.to_string(), value.to_string()))
258}
259
260fn validate_rel_field(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
261    let max = match key {
262        "description" => 1000,
263        "amount" => 50,
264        "currency" | "effective_date" | "expiry_date" => 10,
265        _ => return,
266    };
267
268    if value.len() > max {
269        errors.push(ParseError {
270            line,
271            message: format!(
272                "relationship field {key:?} exceeds {max} chars (got {})",
273                value.len()
274            ),
275        });
276    }
277
278    // Date format validation
279    if matches!(key, "effective_date" | "expiry_date") && !value.is_empty() {
280        let valid = matches!(value.len(), 4 | 7 | 10)
281            && value.chars().enumerate().all(|(i, c)| match i {
282                4 | 7 => c == '-',
283                _ => c.is_ascii_digit(),
284            });
285        if !valid {
286            errors.push(ParseError {
287                line,
288                message: format!(
289                    "relationship field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"
290                ),
291            });
292        }
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    #[test]
301    fn parse_basic_relationship() {
302        let body = "\n- Alice -> Bob: employed_by\n";
303        let names = vec!["Alice", "Bob"];
304        let sources = vec!["https://example.com/src".to_string()];
305        let mut errors = Vec::new();
306
307        let rels = parse_relationships(body, 50, &names, &sources, &mut errors);
308        assert!(errors.is_empty(), "errors: {errors:?}");
309        assert_eq!(rels.len(), 1);
310        assert_eq!(rels[0].source_name, "Alice");
311        assert_eq!(rels[0].target_name, "Bob");
312        assert_eq!(rels[0].rel_type, "employed_by");
313        // Should default to front matter sources
314        assert_eq!(rels[0].source_urls, vec!["https://example.com/src"]);
315    }
316
317    #[test]
318    fn parse_relationship_with_source_override() {
319        let body = [
320            "",
321            "- Alice -> Bob: related_to",
322            "  - source: https://specific.com/article",
323            "",
324        ]
325        .join("\n");
326        let names = vec!["Alice", "Bob"];
327        let sources = vec!["https://default.com".to_string()];
328        let mut errors = Vec::new();
329
330        let rels = parse_relationships(&body, 10, &names, &sources, &mut errors);
331        assert!(errors.is_empty(), "errors: {errors:?}");
332        assert_eq!(rels[0].source_urls, vec!["https://specific.com/article"]);
333    }
334
335    #[test]
336    fn parse_relationship_with_fields() {
337        let body = [
338            "",
339            "- Alice -> Corp: donated_to",
340            "  - amount: EUR 50,000",
341            "  - currency: EUR",
342            "  - effective_date: 2020-01",
343            "  - description: Campaign donation",
344            "",
345        ]
346        .join("\n");
347        let names = vec!["Alice", "Corp"];
348        let mut errors = Vec::new();
349
350        let rels = parse_relationships(&body, 10, &names, &[], &mut errors);
351        assert!(errors.is_empty(), "errors: {errors:?}");
352        assert_eq!(rels[0].fields.len(), 4);
353    }
354
355    #[test]
356    fn reject_unknown_rel_type() {
357        let body = "\n- Alice -> Bob: best_friends\n";
358        let names = vec!["Alice", "Bob"];
359        let mut errors = Vec::new();
360
361        parse_relationships(body, 1, &names, &[], &mut errors);
362        assert!(
363            errors
364                .iter()
365                .any(|e| e.message.contains("unknown relationship type"))
366        );
367    }
368
369    #[test]
370    fn reject_unresolved_entity() {
371        let body = "\n- Alice -> Unknown: employed_by\n";
372        let names = vec!["Alice"];
373        let mut errors = Vec::new();
374
375        parse_relationships(body, 1, &names, &[], &mut errors);
376        assert!(
377            errors
378                .iter()
379                .any(|e| e.message.contains("not defined in file"))
380        );
381    }
382
383    #[test]
384    fn reject_non_https_source_override() {
385        let body = [
386            "",
387            "- Alice -> Bob: related_to",
388            "  - source: http://insecure.com",
389            "",
390        ]
391        .join("\n");
392        let names = vec!["Alice", "Bob"];
393        let mut errors = Vec::new();
394
395        parse_relationships(&body, 1, &names, &[], &mut errors);
396        assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
397    }
398
399    #[test]
400    fn reject_unknown_rel_field() {
401        let body = ["", "- Alice -> Bob: related_to", "  - foobar: value", ""].join("\n");
402        let names = vec!["Alice", "Bob"];
403        let mut errors = Vec::new();
404
405        parse_relationships(&body, 1, &names, &[], &mut errors);
406        assert!(
407            errors
408                .iter()
409                .any(|e| e.message.contains("unknown relationship field"))
410        );
411    }
412
413    #[test]
414    fn multiple_relationships() {
415        let body = [
416            "",
417            "- Alice -> Bob: employed_by",
418            "- Bob -> Corp: member_of",
419            "- Corp -> Alice: sued_by",
420            "",
421        ]
422        .join("\n");
423        let names = vec!["Alice", "Bob", "Corp"];
424        let mut errors = Vec::new();
425
426        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
427        assert!(errors.is_empty(), "errors: {errors:?}");
428        assert_eq!(rels.len(), 3);
429    }
430
431    #[test]
432    fn parse_rel_line_syntax() {
433        let result = parse_rel_line("Mark Bonnick -> Arsenal FC: employed_by");
434        assert_eq!(
435            result,
436            Some((
437                "Mark Bonnick".into(),
438                "Arsenal FC".into(),
439                "employed_by".into()
440            ))
441        );
442    }
443
444    #[test]
445    fn parse_rel_line_invalid() {
446        assert!(parse_rel_line("not a relationship").is_none());
447        assert!(parse_rel_line("-> Target: type").is_none());
448        assert!(parse_rel_line("Source -> : type").is_none());
449    }
450
451    #[test]
452    fn relationship_date_validation() {
453        let body = [
454            "",
455            "- Alice -> Bob: related_to",
456            "  - effective_date: not-a-date",
457            "",
458        ]
459        .join("\n");
460        let names = vec!["Alice", "Bob"];
461        let mut errors = Vec::new();
462
463        parse_relationships(&body, 1, &names, &[], &mut errors);
464        assert!(errors.iter().any(|e| e.message.contains("YYYY")));
465    }
466
467    #[test]
468    fn multiple_source_overrides() {
469        let body = [
470            "",
471            "- Alice -> Bob: related_to",
472            "  - source: https://first.com",
473            "  - source: https://second.com",
474            "",
475        ]
476        .join("\n");
477        let names = vec!["Alice", "Bob"];
478        let mut errors = Vec::new();
479
480        let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
481        assert!(errors.is_empty(), "errors: {errors:?}");
482        assert_eq!(rels[0].source_urls.len(), 2);
483    }
484}