1use crate::parser::ParseError;
2
3const MAX_RELATIONSHIPS_PER_FILE: usize = 200;
5
6const KNOWN_REL_TYPES: &[&str] = &[
8 "affiliated_with",
9 "appointed_by",
10 "charged_in",
11 "contracted_with",
12 "convicted_in",
13 "donated_to",
14 "employed_by",
15 "endorsed_by",
16 "family_of",
17 "funded_by",
18 "investigated_by",
19 "lobbied_for",
20 "member_of",
21 "next",
22 "owns",
23 "related_to",
24 "represented_by",
25 "sponsored_by",
26 "sued_by",
27 "transferred_to",
28];
29
30const REL_FIELDS: &[&str] = &[
32 "id",
33 "source",
34 "description",
35 "amount",
36 "currency",
37 "effective_date",
38 "expiry_date",
39];
40
41#[derive(Debug)]
43#[allow(clippy::struct_field_names)]
44pub struct Rel {
45 pub source_name: String,
46 pub target_name: String,
47 pub rel_type: String,
48 pub source_urls: Vec<String>,
49 pub fields: Vec<(String, String)>,
50 pub id: Option<String>,
52 pub line: usize,
54}
55
56#[allow(clippy::too_many_lines)]
61pub fn parse_relationships(
62 body: &str,
63 section_start_line: usize,
64 entity_names: &[&str],
65 default_sources: &[String],
66 errors: &mut Vec<ParseError>,
67) -> Vec<Rel> {
68 let lines: Vec<&str> = body.lines().collect();
69 let mut rels: Vec<Rel> = Vec::new();
70
71 let mut current: Option<RelBuilder> = None;
73
74 for (i, line) in lines.iter().enumerate() {
75 let file_line = section_start_line + 1 + i;
76 let trimmed = line.trim();
77
78 if trimmed.starts_with("- ") && !line.starts_with(" ") {
80 if let Some(builder) = current.take() {
82 rels.push(builder.finish(default_sources));
83 }
84
85 let item = &trimmed[2..];
86 match parse_rel_line(item) {
87 Some((source, target, rel_type)) => {
88 if !KNOWN_REL_TYPES.contains(&rel_type.as_str()) {
90 errors.push(ParseError {
91 line: file_line,
92 message: format!(
93 "unknown relationship type {rel_type:?} (known: {})",
94 KNOWN_REL_TYPES.join(", ")
95 ),
96 });
97 }
98
99 if !entity_names.contains(&source.as_str()) {
101 errors.push(ParseError {
102 line: file_line,
103 message: format!(
104 "entity {source:?} in relationship not defined in file"
105 ),
106 });
107 }
108 if !entity_names.contains(&target.as_str()) {
109 errors.push(ParseError {
110 line: file_line,
111 message: format!(
112 "entity {target:?} in relationship not defined in file"
113 ),
114 });
115 }
116
117 current = Some(RelBuilder {
118 source_name: source,
119 target_name: target,
120 rel_type,
121 source_urls: Vec::new(),
122 fields: Vec::new(),
123 id: None,
124 line: file_line,
125 });
126 }
127 None => {
128 errors.push(ParseError {
129 line: file_line,
130 message: format!(
131 "invalid relationship syntax: expected `- Source -> Target: type`, got {trimmed:?}"
132 ),
133 });
134 }
135 }
136 continue;
137 }
138
139 if line.starts_with(" - ") && current.is_some() {
141 let nested = trimmed.strip_prefix("- ").unwrap_or(trimmed);
142 if let Some((key, value)) = parse_kv(nested) {
143 if !REL_FIELDS.contains(&key.as_str()) {
144 errors.push(ParseError {
145 line: file_line,
146 message: format!("unknown relationship field {key:?}"),
147 });
148 continue;
149 }
150
151 let builder = current.as_mut().unwrap_or_else(|| unreachable!());
152
153 if key == "id" {
154 builder.id = Some(value);
155 } else if key == "source" {
156 if !value.starts_with("https://") {
157 errors.push(ParseError {
158 line: file_line,
159 message: format!("relationship source URL must be HTTPS: {value:?}"),
160 });
161 }
162 builder.source_urls.push(value);
163 } else {
164 validate_rel_field(&key, &value, file_line, errors);
166 builder.fields.push((key, value));
167 }
168 } else {
169 errors.push(ParseError {
170 line: file_line,
171 message: format!(
172 "invalid nested field syntax: expected `- key: value`, got {trimmed:?}"
173 ),
174 });
175 }
176 }
177
178 }
180
181 if let Some(builder) = current.take() {
183 rels.push(builder.finish(default_sources));
184 }
185
186 if rels.len() > MAX_RELATIONSHIPS_PER_FILE {
188 errors.push(ParseError {
189 line: section_start_line,
190 message: format!(
191 "too many relationships (max {MAX_RELATIONSHIPS_PER_FILE}, got {})",
192 rels.len()
193 ),
194 });
195 }
196
197 rels
198}
199
200struct RelBuilder {
201 source_name: String,
202 target_name: String,
203 rel_type: String,
204 source_urls: Vec<String>,
205 fields: Vec<(String, String)>,
206 id: Option<String>,
207 line: usize,
208}
209
210impl RelBuilder {
211 fn finish(self, default_sources: &[String]) -> Rel {
212 let source_urls = if self.source_urls.is_empty() {
213 default_sources.to_vec()
214 } else {
215 self.source_urls
216 };
217
218 Rel {
219 source_name: self.source_name,
220 target_name: self.target_name,
221 rel_type: self.rel_type,
222 source_urls,
223 fields: self.fields,
224 id: self.id,
225 line: self.line,
226 }
227 }
228}
229
230fn parse_rel_line(item: &str) -> Option<(String, String, String)> {
232 let arrow_pos = item.find(" -> ")?;
233 let source = item[..arrow_pos].trim();
234 let after_arrow = &item[arrow_pos + 4..];
235
236 let colon_pos = after_arrow.rfind(':')?;
237 let target = after_arrow[..colon_pos].trim();
238 let rel_type = after_arrow[colon_pos + 1..]
239 .trim()
240 .to_lowercase()
241 .replace(' ', "_");
242
243 if source.is_empty() || target.is_empty() || rel_type.is_empty() {
244 return None;
245 }
246
247 Some((source.to_string(), target.to_string(), rel_type))
248}
249
250fn parse_kv(s: &str) -> Option<(String, String)> {
251 let colon = s.find(':')?;
252 let key = s[..colon].trim();
253 if key.is_empty() {
254 return None;
255 }
256 let value = s[colon + 1..].trim();
257 Some((key.to_string(), value.to_string()))
258}
259
260fn validate_rel_field(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
261 let max = match key {
262 "description" => 1000,
263 "amount" => 50,
264 "currency" | "effective_date" | "expiry_date" => 10,
265 _ => return,
266 };
267
268 if value.len() > max {
269 errors.push(ParseError {
270 line,
271 message: format!(
272 "relationship field {key:?} exceeds {max} chars (got {})",
273 value.len()
274 ),
275 });
276 }
277
278 if matches!(key, "effective_date" | "expiry_date") && !value.is_empty() {
280 let valid = matches!(value.len(), 4 | 7 | 10)
281 && value.chars().enumerate().all(|(i, c)| match i {
282 4 | 7 => c == '-',
283 _ => c.is_ascii_digit(),
284 });
285 if !valid {
286 errors.push(ParseError {
287 line,
288 message: format!(
289 "relationship field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"
290 ),
291 });
292 }
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299
300 #[test]
301 fn parse_basic_relationship() {
302 let body = "\n- Alice -> Bob: employed_by\n";
303 let names = vec!["Alice", "Bob"];
304 let sources = vec!["https://example.com/src".to_string()];
305 let mut errors = Vec::new();
306
307 let rels = parse_relationships(body, 50, &names, &sources, &mut errors);
308 assert!(errors.is_empty(), "errors: {errors:?}");
309 assert_eq!(rels.len(), 1);
310 assert_eq!(rels[0].source_name, "Alice");
311 assert_eq!(rels[0].target_name, "Bob");
312 assert_eq!(rels[0].rel_type, "employed_by");
313 assert_eq!(rels[0].source_urls, vec!["https://example.com/src"]);
315 }
316
317 #[test]
318 fn parse_relationship_with_source_override() {
319 let body = [
320 "",
321 "- Alice -> Bob: related_to",
322 " - source: https://specific.com/article",
323 "",
324 ]
325 .join("\n");
326 let names = vec!["Alice", "Bob"];
327 let sources = vec!["https://default.com".to_string()];
328 let mut errors = Vec::new();
329
330 let rels = parse_relationships(&body, 10, &names, &sources, &mut errors);
331 assert!(errors.is_empty(), "errors: {errors:?}");
332 assert_eq!(rels[0].source_urls, vec!["https://specific.com/article"]);
333 }
334
335 #[test]
336 fn parse_relationship_with_fields() {
337 let body = [
338 "",
339 "- Alice -> Corp: donated_to",
340 " - amount: EUR 50,000",
341 " - currency: EUR",
342 " - effective_date: 2020-01",
343 " - description: Campaign donation",
344 "",
345 ]
346 .join("\n");
347 let names = vec!["Alice", "Corp"];
348 let mut errors = Vec::new();
349
350 let rels = parse_relationships(&body, 10, &names, &[], &mut errors);
351 assert!(errors.is_empty(), "errors: {errors:?}");
352 assert_eq!(rels[0].fields.len(), 4);
353 }
354
355 #[test]
356 fn reject_unknown_rel_type() {
357 let body = "\n- Alice -> Bob: best_friends\n";
358 let names = vec!["Alice", "Bob"];
359 let mut errors = Vec::new();
360
361 parse_relationships(body, 1, &names, &[], &mut errors);
362 assert!(
363 errors
364 .iter()
365 .any(|e| e.message.contains("unknown relationship type"))
366 );
367 }
368
369 #[test]
370 fn reject_unresolved_entity() {
371 let body = "\n- Alice -> Unknown: employed_by\n";
372 let names = vec!["Alice"];
373 let mut errors = Vec::new();
374
375 parse_relationships(body, 1, &names, &[], &mut errors);
376 assert!(
377 errors
378 .iter()
379 .any(|e| e.message.contains("not defined in file"))
380 );
381 }
382
383 #[test]
384 fn reject_non_https_source_override() {
385 let body = [
386 "",
387 "- Alice -> Bob: related_to",
388 " - source: http://insecure.com",
389 "",
390 ]
391 .join("\n");
392 let names = vec!["Alice", "Bob"];
393 let mut errors = Vec::new();
394
395 parse_relationships(&body, 1, &names, &[], &mut errors);
396 assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
397 }
398
399 #[test]
400 fn reject_unknown_rel_field() {
401 let body = ["", "- Alice -> Bob: related_to", " - foobar: value", ""].join("\n");
402 let names = vec!["Alice", "Bob"];
403 let mut errors = Vec::new();
404
405 parse_relationships(&body, 1, &names, &[], &mut errors);
406 assert!(
407 errors
408 .iter()
409 .any(|e| e.message.contains("unknown relationship field"))
410 );
411 }
412
413 #[test]
414 fn multiple_relationships() {
415 let body = [
416 "",
417 "- Alice -> Bob: employed_by",
418 "- Bob -> Corp: member_of",
419 "- Corp -> Alice: sued_by",
420 "",
421 ]
422 .join("\n");
423 let names = vec!["Alice", "Bob", "Corp"];
424 let mut errors = Vec::new();
425
426 let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
427 assert!(errors.is_empty(), "errors: {errors:?}");
428 assert_eq!(rels.len(), 3);
429 }
430
431 #[test]
432 fn parse_rel_line_syntax() {
433 let result = parse_rel_line("Mark Bonnick -> Arsenal FC: employed_by");
434 assert_eq!(
435 result,
436 Some((
437 "Mark Bonnick".into(),
438 "Arsenal FC".into(),
439 "employed_by".into()
440 ))
441 );
442 }
443
444 #[test]
445 fn parse_rel_line_invalid() {
446 assert!(parse_rel_line("not a relationship").is_none());
447 assert!(parse_rel_line("-> Target: type").is_none());
448 assert!(parse_rel_line("Source -> : type").is_none());
449 }
450
451 #[test]
452 fn relationship_date_validation() {
453 let body = [
454 "",
455 "- Alice -> Bob: related_to",
456 " - effective_date: not-a-date",
457 "",
458 ]
459 .join("\n");
460 let names = vec!["Alice", "Bob"];
461 let mut errors = Vec::new();
462
463 parse_relationships(&body, 1, &names, &[], &mut errors);
464 assert!(errors.iter().any(|e| e.message.contains("YYYY")));
465 }
466
467 #[test]
468 fn multiple_source_overrides() {
469 let body = [
470 "",
471 "- Alice -> Bob: related_to",
472 " - source: https://first.com",
473 " - source: https://second.com",
474 "",
475 ]
476 .join("\n");
477 let names = vec!["Alice", "Bob"];
478 let mut errors = Vec::new();
479
480 let rels = parse_relationships(&body, 1, &names, &[], &mut errors);
481 assert!(errors.is_empty(), "errors: {errors:?}");
482 assert_eq!(rels[0].source_urls.len(), 2);
483 }
484}