1use std::collections::{HashMap, HashSet};
7
8use chrono::{DateTime, Utc};
9use rand::prelude::*;
10use rand_chacha::ChaCha8Rng;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use uuid::Uuid;
14
15use super::rules::{
16 CardinalityRule, PropertyGenerator, PropertyValueType, RelationshipConfig,
17 RelationshipTypeConfig, RelationshipValidation,
18};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct GeneratedRelationship {
23 pub relationship_type: String,
25 pub id: String,
27 pub source_id: String,
29 pub target_id: String,
31 pub properties: HashMap<String, Value>,
33 pub metadata: RelationshipMetadata,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct RelationshipMetadata {
40 pub source: String,
42 pub generated_at: DateTime<Utc>,
44 pub weight: Option<f64>,
46 pub valid_from: Option<DateTime<Utc>>,
48 pub valid_to: Option<DateTime<Utc>>,
50 pub labels: HashMap<String, String>,
52 pub features: Option<Vec<f64>>,
54 pub is_directed: bool,
56}
57
58impl Default for RelationshipMetadata {
59 fn default() -> Self {
60 Self {
61 source: "datasynth".to_string(),
62 generated_at: Utc::now(),
63 weight: None,
64 valid_from: None,
65 valid_to: None,
66 labels: HashMap::new(),
67 features: None,
68 is_directed: true,
69 }
70 }
71}
72
73#[derive(Debug, Clone)]
75pub struct NodeRef {
76 pub id: String,
78 pub node_type: String,
80 pub properties: HashMap<String, Value>,
82}
83
84impl NodeRef {
85 pub fn new(id: impl Into<String>, node_type: impl Into<String>) -> Self {
87 Self {
88 id: id.into(),
89 node_type: node_type.into(),
90 properties: HashMap::new(),
91 }
92 }
93
94 pub fn with_property(mut self, key: impl Into<String>, value: Value) -> Self {
96 self.properties.insert(key.into(), value);
97 self
98 }
99}
100
101pub struct RelationshipGenerator {
103 config: RelationshipConfig,
105 rng: ChaCha8Rng,
107 count: u64,
109 relationships_by_source: HashMap<String, HashMap<String, Vec<String>>>,
111 relationships_by_target: HashMap<String, HashMap<String, Vec<String>>>,
113 visited: HashSet<String>,
115}
116
117impl RelationshipGenerator {
118 pub fn new(config: RelationshipConfig, seed: u64) -> Self {
120 Self {
121 config,
122 rng: ChaCha8Rng::seed_from_u64(seed),
123 count: 0,
124 relationships_by_source: HashMap::new(),
125 relationships_by_target: HashMap::new(),
126 visited: HashSet::new(),
127 }
128 }
129
130 pub fn with_defaults(seed: u64) -> Self {
132 Self::new(RelationshipConfig::default(), seed)
133 }
134
135 pub fn generate_relationships(&mut self, nodes: &[NodeRef]) -> Vec<GeneratedRelationship> {
137 let mut relationships = Vec::new();
138
139 let nodes_by_type = self.group_nodes_by_type(nodes);
141
142 let relationship_types = self.config.relationship_types.clone();
144
145 for rel_type in &relationship_types {
147 let rels = self.generate_for_type(rel_type, &nodes_by_type);
148 relationships.extend(rels);
149 }
150
151 relationships
152 }
153
154 pub fn generate_for_node(
156 &mut self,
157 node: &NodeRef,
158 available_targets: &HashMap<String, Vec<NodeRef>>,
159 ) -> Vec<GeneratedRelationship> {
160 if self.config.allow_orphans && self.rng.gen_bool(self.config.orphan_probability) {
162 return Vec::new();
163 }
164
165 let mut relationships = Vec::new();
166
167 let applicable_types: Vec<_> = self
169 .config
170 .relationship_types
171 .iter()
172 .filter(|rt| rt.source_type == node.node_type)
173 .cloned()
174 .collect();
175
176 for rel_type in &applicable_types {
177 if let Some(targets) = available_targets.get(&rel_type.target_type) {
178 let rels = self.generate_edges_for_node(node, targets, rel_type);
179 relationships.extend(rels);
180 }
181 }
182
183 relationships
184 }
185
186 pub fn check_cardinality(
188 &self,
189 source_id: &str,
190 target_id: &str,
191 rel_type: &str,
192 ) -> RelationshipValidation {
193 let type_config = self
195 .config
196 .relationship_types
197 .iter()
198 .find(|rt| rt.name == rel_type);
199
200 let Some(type_config) = type_config else {
201 return RelationshipValidation::invalid(format!(
202 "Unknown relationship type: {}",
203 rel_type
204 ));
205 };
206
207 let (_min, max) = type_config.cardinality.bounds();
208
209 let current_count = self
211 .relationships_by_source
212 .get(source_id)
213 .and_then(|m| m.get(rel_type))
214 .map(|v| v.len())
215 .unwrap_or(0);
216
217 if current_count >= max as usize {
218 return RelationshipValidation::invalid(format!(
219 "Source {} already has maximum {} {} relationships",
220 source_id, max, rel_type
221 ));
222 }
223
224 if matches!(
226 type_config.cardinality,
227 CardinalityRule::OneToOne | CardinalityRule::ManyToOne { .. }
228 ) {
229 let target_count = self
230 .relationships_by_target
231 .get(target_id)
232 .and_then(|m| m.get(rel_type))
233 .map(|v| v.len())
234 .unwrap_or(0);
235
236 if target_count > 0 {
237 return RelationshipValidation::invalid(format!(
238 "Target {} already has a {} relationship",
239 target_id, rel_type
240 ));
241 }
242 }
243
244 RelationshipValidation::valid()
245 }
246
247 pub fn check_circular(&mut self, source_id: &str, target_id: &str) -> bool {
249 if !self.config.allow_circular {
250 if source_id == target_id {
252 return true;
253 }
254
255 self.visited.clear();
257 self.visited.insert(source_id.to_string());
258
259 return self.has_path_to(target_id, source_id, 0);
260 }
261
262 false
263 }
264
265 pub fn count(&self) -> u64 {
267 self.count
268 }
269
270 pub fn reset(&mut self, seed: u64) {
272 self.rng = ChaCha8Rng::seed_from_u64(seed);
273 self.count = 0;
274 self.relationships_by_source.clear();
275 self.relationships_by_target.clear();
276 self.visited.clear();
277 }
278
279 pub fn config(&self) -> &RelationshipConfig {
281 &self.config
282 }
283
284 fn group_nodes_by_type(&self, nodes: &[NodeRef]) -> HashMap<String, Vec<NodeRef>> {
286 let mut grouped: HashMap<String, Vec<NodeRef>> = HashMap::new();
287
288 for node in nodes {
289 grouped
290 .entry(node.node_type.clone())
291 .or_default()
292 .push(node.clone());
293 }
294
295 grouped
296 }
297
298 fn generate_for_type(
300 &mut self,
301 rel_type: &RelationshipTypeConfig,
302 nodes_by_type: &HashMap<String, Vec<NodeRef>>,
303 ) -> Vec<GeneratedRelationship> {
304 let mut relationships = Vec::new();
305
306 let Some(source_nodes) = nodes_by_type.get(&rel_type.source_type) else {
307 return relationships;
308 };
309
310 let Some(target_nodes) = nodes_by_type.get(&rel_type.target_type) else {
311 return relationships;
312 };
313
314 for source in source_nodes {
315 let rels = self.generate_edges_for_node(source, target_nodes, rel_type);
316 relationships.extend(rels);
317 }
318
319 relationships
320 }
321
322 fn generate_edges_for_node(
324 &mut self,
325 source: &NodeRef,
326 targets: &[NodeRef],
327 rel_type: &RelationshipTypeConfig,
328 ) -> Vec<GeneratedRelationship> {
329 let mut relationships = Vec::new();
330
331 if targets.is_empty() {
332 return relationships;
333 }
334
335 let (min, max) = rel_type.cardinality.bounds();
337 let count = if min == max {
338 min as usize
339 } else {
340 self.rng.gen_range(min..=max) as usize
341 };
342
343 let available_targets: Vec<_> = targets
345 .iter()
346 .filter(|t| {
347 let validation = self.check_cardinality(&source.id, &t.id, &rel_type.name);
349 if !validation.valid {
350 return false;
351 }
352
353 if self.check_circular(&source.id, &t.id) {
355 return false;
356 }
357
358 true
359 })
360 .collect();
361
362 if available_targets.is_empty() && rel_type.required {
363 return relationships;
365 }
366
367 let selected_count = count.min(available_targets.len());
369 let mut selected_indices: Vec<usize> = (0..available_targets.len()).collect();
370 selected_indices.shuffle(&mut self.rng);
371 selected_indices.truncate(selected_count);
372
373 for idx in selected_indices {
374 let target = available_targets[idx];
375 let relationship = self.create_relationship(source, target, rel_type);
376
377 self.track_relationship(&source.id, &target.id, &rel_type.name);
379
380 relationships.push(relationship);
381 }
382
383 relationships
384 }
385
386 fn create_relationship(
388 &mut self,
389 source: &NodeRef,
390 target: &NodeRef,
391 rel_type: &RelationshipTypeConfig,
392 ) -> GeneratedRelationship {
393 self.count += 1;
394
395 let id = Uuid::new_v4().to_string();
396 let properties = self.generate_properties(source, target, &rel_type.properties);
397
398 let metadata = RelationshipMetadata {
399 source: "datasynth".to_string(),
400 generated_at: Utc::now(),
401 weight: Some(rel_type.weight),
402 valid_from: None,
403 valid_to: None,
404 labels: HashMap::new(),
405 features: None,
406 is_directed: rel_type.directed,
407 };
408
409 GeneratedRelationship {
410 relationship_type: rel_type.name.clone(),
411 id,
412 source_id: source.id.clone(),
413 target_id: target.id.clone(),
414 properties,
415 metadata,
416 }
417 }
418
419 fn generate_properties(
421 &mut self,
422 source: &NodeRef,
423 target: &NodeRef,
424 rules: &[super::rules::PropertyGenerationRule],
425 ) -> HashMap<String, Value> {
426 let mut properties = HashMap::new();
427
428 for rule in rules {
429 let value =
430 self.generate_property_value(source, target, &rule.generator, &rule.value_type);
431 properties.insert(rule.name.clone(), value);
432 }
433
434 properties
435 }
436
437 fn generate_property_value(
439 &mut self,
440 source: &NodeRef,
441 target: &NodeRef,
442 generator: &PropertyGenerator,
443 value_type: &PropertyValueType,
444 ) -> Value {
445 match generator {
446 PropertyGenerator::Constant(value) => value.clone(),
447
448 PropertyGenerator::RandomChoice(choices) => {
449 if choices.is_empty() {
450 Value::Null
451 } else {
452 let idx = self.rng.gen_range(0..choices.len());
453 choices[idx].clone()
454 }
455 }
456
457 PropertyGenerator::Range { min, max } => {
458 let value = self.rng.gen_range(*min..=*max);
459 match value_type {
460 PropertyValueType::Integer => {
461 Value::Number(serde_json::Number::from(value as i64))
462 }
463 _ => Value::Number(
464 serde_json::Number::from_f64(value)
465 .unwrap_or_else(|| serde_json::Number::from(0)),
466 ),
467 }
468 }
469
470 PropertyGenerator::FromSourceProperty(prop_name) => source
471 .properties
472 .get(prop_name)
473 .cloned()
474 .unwrap_or(Value::Null),
475
476 PropertyGenerator::FromTargetProperty(prop_name) => target
477 .properties
478 .get(prop_name)
479 .cloned()
480 .unwrap_or(Value::Null),
481
482 PropertyGenerator::Uuid => Value::String(Uuid::new_v4().to_string()),
483
484 PropertyGenerator::Timestamp => Value::String(Utc::now().to_rfc3339()),
485 }
486 }
487
488 fn track_relationship(&mut self, source_id: &str, target_id: &str, rel_type: &str) {
490 self.relationships_by_source
492 .entry(source_id.to_string())
493 .or_default()
494 .entry(rel_type.to_string())
495 .or_default()
496 .push(target_id.to_string());
497
498 self.relationships_by_target
500 .entry(target_id.to_string())
501 .or_default()
502 .entry(rel_type.to_string())
503 .or_default()
504 .push(source_id.to_string());
505 }
506
507 fn has_path_to(&mut self, current: &str, target: &str, depth: u32) -> bool {
509 if depth >= self.config.max_circular_depth {
510 return false;
511 }
512
513 if current == target {
514 return true;
515 }
516
517 if self.visited.contains(current) {
518 return false;
519 }
520
521 self.visited.insert(current.to_string());
522
523 let next_nodes: Vec<String> = self
525 .relationships_by_source
526 .get(current)
527 .map(|outgoing| outgoing.values().flatten().cloned().collect())
528 .unwrap_or_default();
529
530 for next in next_nodes {
532 if self.has_path_to(&next, target, depth + 1) {
533 return true;
534 }
535 }
536
537 false
538 }
539}
540
541pub struct RelationshipConfigBuilder {
543 config: RelationshipConfig,
544}
545
546impl RelationshipConfigBuilder {
547 pub fn new() -> Self {
549 Self {
550 config: RelationshipConfig::default(),
551 }
552 }
553
554 pub fn add_type(mut self, type_config: RelationshipTypeConfig) -> Self {
556 self.config.relationship_types.push(type_config);
557 self
558 }
559
560 pub fn allow_orphans(mut self, allow: bool) -> Self {
562 self.config.allow_orphans = allow;
563 self
564 }
565
566 pub fn orphan_probability(mut self, prob: f64) -> Self {
568 self.config.orphan_probability = prob.clamp(0.0, 1.0);
569 self
570 }
571
572 pub fn allow_circular(mut self, allow: bool) -> Self {
574 self.config.allow_circular = allow;
575 self
576 }
577
578 pub fn max_circular_depth(mut self, depth: u32) -> Self {
580 self.config.max_circular_depth = depth;
581 self
582 }
583
584 pub fn build(self) -> RelationshipConfig {
586 self.config
587 }
588}
589
590impl Default for RelationshipConfigBuilder {
591 fn default() -> Self {
592 Self::new()
593 }
594}
595
596#[cfg(test)]
597#[allow(clippy::unwrap_used)]
598mod tests {
599 use super::*;
600
601 fn create_test_nodes() -> Vec<NodeRef> {
602 vec![
603 NodeRef::new("je_1", "journal_entry"),
604 NodeRef::new("je_2", "journal_entry"),
605 NodeRef::new("acc_1", "account"),
606 NodeRef::new("acc_2", "account"),
607 NodeRef::new("acc_3", "account"),
608 NodeRef::new("user_1", "user"),
609 ]
610 }
611
612 #[test]
613 fn test_generate_relationships() {
614 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
615 "debits",
616 "journal_entry",
617 "account",
618 )
619 .with_cardinality(CardinalityRule::one_to_many(1, 2))]);
620
621 let mut generator = RelationshipGenerator::new(config, 42);
622 let nodes = create_test_nodes();
623 let relationships = generator.generate_relationships(&nodes);
624
625 assert!(!relationships.is_empty());
626 for rel in &relationships {
627 assert_eq!(rel.relationship_type, "debits");
628 assert!(rel.source_id.starts_with("je_"));
629 assert!(rel.target_id.starts_with("acc_"));
630 }
631 }
632
633 #[test]
634 fn test_cardinality_validation() {
635 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
636 "debits",
637 "journal_entry",
638 "account",
639 )
640 .with_cardinality(CardinalityRule::one_to_one())]);
641
642 let generator = RelationshipGenerator::new(config, 42);
643
644 let validation = generator.check_cardinality("je_1", "acc_1", "debits");
645 assert!(validation.valid);
646
647 let validation = generator.check_cardinality("je_1", "acc_1", "unknown");
648 assert!(!validation.valid);
649 }
650
651 #[test]
652 fn test_circular_detection() {
653 let config = RelationshipConfig::default()
654 .allow_circular(false)
655 .max_circular_depth(3);
656
657 let mut generator = RelationshipGenerator::new(config, 42);
658
659 assert!(generator.check_circular("a", "a"));
661
662 assert!(!generator.check_circular("a", "b"));
664 }
665
666 #[test]
667 fn test_property_generation() {
668 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
669 "test", "source", "target",
670 )
671 .with_property(super::super::rules::PropertyGenerationRule::range(
672 "amount", 100.0, 1000.0,
673 ))
674 .with_property(
675 super::super::rules::PropertyGenerationRule::constant_string("status", "active"),
676 )]);
677
678 let mut generator = RelationshipGenerator::new(config, 42);
679 let nodes = vec![NodeRef::new("s1", "source"), NodeRef::new("t1", "target")];
680
681 let relationships = generator.generate_relationships(&nodes);
682
683 assert!(!relationships.is_empty());
684 let rel = &relationships[0];
685 assert!(rel.properties.contains_key("amount"));
686 assert!(rel.properties.contains_key("status"));
687 assert_eq!(
688 rel.properties.get("status"),
689 Some(&Value::String("active".into()))
690 );
691 }
692
693 #[test]
694 fn test_orphan_generation() {
695 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
696 "test", "source", "target",
697 )
698 .with_cardinality(CardinalityRule::one_to_one())])
699 .allow_orphans(true)
700 .orphan_probability(1.0); let mut generator = RelationshipGenerator::new(config, 42);
703
704 let source = NodeRef::new("s1", "source");
705 let available: HashMap<String, Vec<NodeRef>> =
706 [("target".to_string(), vec![NodeRef::new("t1", "target")])]
707 .into_iter()
708 .collect();
709
710 let relationships = generator.generate_for_node(&source, &available);
711 assert!(relationships.is_empty());
712 }
713
714 #[test]
715 fn test_config_builder() {
716 let config = RelationshipConfigBuilder::new()
717 .add_type(RelationshipTypeConfig::new("test", "a", "b"))
718 .allow_orphans(false)
719 .orphan_probability(0.1)
720 .allow_circular(true)
721 .max_circular_depth(5)
722 .build();
723
724 assert_eq!(config.relationship_types.len(), 1);
725 assert!(!config.allow_orphans);
726 assert_eq!(config.orphan_probability, 0.1);
727 assert!(config.allow_circular);
728 assert_eq!(config.max_circular_depth, 5);
729 }
730
731 #[test]
732 fn test_generator_count_and_reset() {
733 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
734 "test", "source", "target",
735 )
736 .with_cardinality(CardinalityRule::one_to_one())]);
737
738 let mut generator = RelationshipGenerator::new(config, 42);
739 assert_eq!(generator.count(), 0);
740
741 let nodes = vec![NodeRef::new("s1", "source"), NodeRef::new("t1", "target")];
742 generator.generate_relationships(&nodes);
743
744 assert!(generator.count() > 0);
745
746 generator.reset(42);
747 assert_eq!(generator.count(), 0);
748 }
749}