1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
10use std::fs;
11use std::path::{Path, PathBuf};
12
13use anyhow::{anyhow, bail, Context, Result};
14use csv::Writer;
15use dsfb::sim::SimConfig;
16use dsfb::DsfbParams;
17use dsfb_add::aet::run_aet_sweep;
18use dsfb_add::iwlt::run_iwlt_sweep;
19use dsfb_add::SimulationConfig as AddSimulationConfig;
20use rayon::prelude::*;
21use serde::{Deserialize, Serialize};
22
23use crate::integrations::{generate_dscd_events_from_dsfb, DscdObserverSample, ResidualState};
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct DscdParams {
34 pub num_events: usize,
35 pub tau_min: f64,
36 pub tau_max: f64,
37 pub tau_steps: usize,
38 pub max_temporal_lag: usize,
39 pub max_structural_radius: f64,
40 pub temporal_decay_gamma: f64,
41 pub structural_decay_beta: f64,
42 pub max_out_edges: usize,
43}
44
45impl DscdParams {
46 pub fn demo() -> Self {
48 Self {
49 num_events: 10_000,
50 tau_min: 0.0,
51 tau_max: 1.0,
52 tau_steps: 64,
53 max_temporal_lag: 64,
54 max_structural_radius: 0.1,
55 temporal_decay_gamma: 0.0,
56 structural_decay_beta: 2.0,
57 max_out_edges: 8,
58 }
59 }
60
61 pub fn performance() -> Self {
63 Self {
64 num_events: 100_000,
65 tau_min: 0.0,
66 tau_max: 1.0,
67 tau_steps: 256,
68 max_temporal_lag: 64,
69 max_structural_radius: 0.1,
70 temporal_decay_gamma: 0.0,
71 structural_decay_beta: 2.0,
72 max_out_edges: 8,
73 }
74 }
75
76 pub fn tau_grid(&self) -> Vec<f64> {
77 linspace(self.tau_min, self.tau_max, self.tau_steps)
78 }
79}
80
81impl Default for DscdParams {
82 fn default() -> Self {
83 Self::demo()
84 }
85}
86
87#[derive(Debug, Clone)]
89pub struct DscdConfig {
90 pub params: DscdParams,
91 pub root_event_id: u64,
92 pub output_dir: PathBuf,
93 pub scaling_ns: Vec<usize>,
94}
95
96impl Default for DscdConfig {
97 fn default() -> Self {
98 Self {
99 params: DscdParams::default(),
100 root_event_id: 0,
101 output_dir: PathBuf::from("output-dsfb-dscd"),
102 scaling_ns: vec![5_000, 10_000, 20_000, 50_000, 100_000],
103 }
104 }
105}
106
107#[derive(Debug, Clone, Serialize)]
113pub struct DscdEvent {
114 pub id: u64,
115 pub time_index: u64,
116 pub module_id: u32,
117 pub residual_state: String,
118 pub trust: f64,
119 pub echo_slope: f64,
120 pub entropy_density: f64,
121 pub structural_level: f64,
122}
123
124#[derive(Debug, Clone, Serialize)]
126pub struct DscdEdge {
127 pub edge_id: u64,
128 pub src: u64,
129 pub dst: u64,
130 pub module_id: u32,
131 pub rewrite_rule_id: Option<String>,
132 pub base_trust: f64,
133 pub effective_trust: f64,
134 pub trust_at_creation: f64,
135 pub temporal_lag: usize,
136 pub structural_distance: f64,
137 pub structural_level_source: f64,
138 pub structural_level_target: f64,
139}
140
141#[derive(Debug, Clone, Default)]
143pub struct DscdGraph {
144 pub events: Vec<DscdEvent>,
145 pub edges: Vec<DscdEdge>,
146}
147
148pub type DscdError = anyhow::Error;
150
151#[derive(Debug, Clone)]
152struct CurvePoint {
153 tau: f64,
154 expansion_ratio: f64,
155}
156
157#[derive(Debug, Clone)]
158struct ScalingMetrics {
159 num_events: usize,
160 tau_star: f64,
161 transition_width: f64,
162 max_derivative: f64,
163}
164
165#[derive(Debug, Clone)]
166struct EventAux {
167 event: DscdEvent,
168 rewrite_rule_id: String,
169 observer_trust: HashMap<u32, f64>,
170}
171
172#[derive(Debug, Clone)]
173struct Dataset {
174 events: Vec<DscdEvent>,
175 candidate_edges: Vec<DscdEdge>,
176}
177
178#[derive(Debug, Clone)]
179struct CandidateEdgeDraft {
180 src: u64,
181 dst: u64,
182 module_id: u32,
183 rewrite_rule_id: String,
184 base_trust: f64,
185 effective_trust: f64,
186 temporal_lag: usize,
187 structural_distance: f64,
188 structural_level_source: f64,
189 structural_level_target: f64,
190}
191
192pub fn structural_distance(e_i: &DscdEvent, e_j: &DscdEvent) -> f64 {
194 (e_j.structural_level - e_i.structural_level).abs()
195}
196
197pub fn effective_trust(base_tau: f64, dt: usize, d_struct: f64, params: &DscdParams) -> f64 {
199 let dt_f = dt as f64;
200 let decay_temporal = (-params.temporal_decay_gamma * dt_f).exp();
201 let decay_struct = (-params.structural_decay_beta * d_struct).exp();
202 base_tau * decay_temporal * decay_struct
203}
204
205pub fn build_graph_for_tau(
211 events: &[DscdEvent],
212 candidate_edges: &[DscdEdge],
213 tau: f64,
214) -> DscdGraph {
215 let time_by_id: HashMap<u64, u64> = events
216 .iter()
217 .map(|event| (event.id, event.time_index))
218 .collect();
219
220 let mut edges = Vec::new();
221 for edge in candidate_edges {
222 if edge.effective_trust < tau {
223 continue;
224 }
225
226 let Some(&src_time) = time_by_id.get(&edge.src) else {
227 continue;
228 };
229 let Some(&dst_time) = time_by_id.get(&edge.dst) else {
230 continue;
231 };
232
233 if src_time < dst_time {
235 edges.push(edge.clone());
236 }
237 }
238
239 DscdGraph {
240 events: events.to_vec(),
241 edges,
242 }
243}
244
245pub fn compute_reachable_component_size(graph: &DscdGraph, root_id: u64) -> usize {
247 if graph.events.is_empty() || !graph.events.iter().any(|event| event.id == root_id) {
248 return 0;
249 }
250
251 let mut adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
252 for edge in &graph.edges {
253 adjacency.entry(edge.src).or_default().push(edge.dst);
254 }
255 for neighbors in adjacency.values_mut() {
256 neighbors.sort_unstable();
257 }
258
259 let mut queue = VecDeque::from([root_id]);
260 let mut visited = HashSet::from([root_id]);
261 while let Some(current) = queue.pop_front() {
262 if let Some(neighbors) = adjacency.get(¤t) {
263 for next in neighbors {
264 if visited.insert(*next) {
265 queue.push_back(*next);
266 }
267 }
268 }
269 }
270
271 visited.len()
272}
273
274pub fn expansion_ratio(graph: &DscdGraph, root_id: u64) -> f64 {
276 if graph.events.is_empty() {
277 return 0.0;
278 }
279 compute_reachable_component_size(graph, root_id) as f64 / graph.events.len() as f64
280}
281
282pub fn run_dscd_simulation(cfg: &DscdConfig) -> std::result::Result<(), DscdError> {
284 validate_config(cfg)?;
285 fs::create_dir_all(&cfg.output_dir).with_context(|| {
286 format!(
287 "failed to create DSCD output directory {}",
288 cfg.output_dir.display()
289 )
290 })?;
291
292 let mut run_ns = cfg.scaling_ns.clone();
293 run_ns.push(cfg.params.num_events);
294 run_ns.sort_unstable();
295 run_ns.dedup();
296
297 let max_n = run_ns
298 .iter()
299 .copied()
300 .max()
301 .ok_or_else(|| anyhow!("no event counts configured"))?;
302 let dataset = generate_dataset(max_n, &cfg.params)?;
303 let taus = cfg.params.tau_grid();
304
305 let mut summary_rows = Vec::new();
306 let mut curve_by_n: HashMap<usize, Vec<CurvePoint>> = HashMap::new();
307 let mut main_events = Vec::new();
308 let mut main_edges = Vec::new();
309
310 for n in run_ns {
311 let events = dataset.events[..n].to_vec();
312 let candidate_edges: Vec<DscdEdge> = dataset
313 .candidate_edges
314 .iter()
315 .filter(|edge| edge.src < n as u64 && edge.dst < n as u64)
316 .cloned()
317 .collect();
318
319 let curve = compute_threshold_curve(&events, &candidate_edges, &taus, cfg.root_event_id);
320 write_threshold_curve_csv(
321 &cfg.output_dir.join(format!("threshold_curve_N_{n}.csv")),
322 &curve,
323 )?;
324
325 let metrics = compute_scaling_metrics(n, &curve);
326 summary_rows.push(metrics.clone());
327 curve_by_n.insert(n, curve);
328
329 if n == cfg.params.num_events {
330 main_events = events;
331 main_edges = candidate_edges;
332 }
333 }
334
335 write_threshold_scaling_summary_csv(
336 &cfg.output_dir.join("threshold_scaling_summary.csv"),
337 &summary_rows,
338 )?;
339 write_finite_size_scaling_csv(
340 &cfg.output_dir.join("finite_size_scaling.csv"),
341 &summary_rows,
342 )?;
343
344 let Some(main_curve) = curve_by_n.get(&cfg.params.num_events) else {
345 bail!(
346 "missing threshold curve for main num_events={}",
347 cfg.params.num_events
348 );
349 };
350 let main_metrics = compute_scaling_metrics(cfg.params.num_events, main_curve);
351 let main_tau = main_metrics.tau_star;
352
353 write_graph_events_csv(&cfg.output_dir.join("graph_events.csv"), &main_events)?;
354 let final_graph = build_graph_for_tau(&main_events, &main_edges, main_tau);
355 write_graph_edges_csv(&cfg.output_dir.join("graph_edges.csv"), &final_graph.edges)?;
356 write_degree_distribution_csv(
357 &cfg.output_dir.join("degree_distribution.csv"),
358 &final_graph,
359 )?;
360 write_interval_sizes_csv(
361 &cfg.output_dir.join("interval_sizes.csv"),
362 &final_graph,
363 1_024,
364 )?;
365 write_path_lengths_csv(
366 &cfg.output_dir.join("path_lengths.csv"),
367 &final_graph,
368 cfg.root_event_id,
369 )?;
370
371 let provenance_path = cfg.output_dir.join("edge_provenance_0.csv");
372 write_edge_provenance_csv(&provenance_path, &final_graph, main_tau, 10)?;
373 let compatibility_path = cfg.output_dir.join("edge_provenance.csv");
374 write_edge_provenance_csv(&compatibility_path, &final_graph, main_tau, 10)?;
375
376 Ok(())
377}
378
379pub fn linspace(start: f64, end: f64, count: usize) -> Vec<f64> {
381 if count <= 1 {
382 return vec![start];
383 }
384 let step = (end - start) / (count.saturating_sub(1) as f64);
385 (0..count).map(|idx| start + step * idx as f64).collect()
386}
387
388fn validate_config(cfg: &DscdConfig) -> Result<()> {
389 let params = &cfg.params;
390 if params.num_events == 0 {
391 bail!("num_events must be greater than zero");
392 }
393 if params.tau_steps < 2 {
394 bail!("tau_steps must be at least 2");
395 }
396 if !params.tau_min.is_finite() || !params.tau_max.is_finite() {
397 bail!("tau bounds must be finite");
398 }
399 if params.tau_max < params.tau_min {
400 bail!("tau_max must be >= tau_min");
401 }
402 if params.max_temporal_lag == 0 {
403 bail!("max_temporal_lag must be > 0");
404 }
405 if !params.max_structural_radius.is_finite() || params.max_structural_radius <= 0.0 {
406 bail!("max_structural_radius must be > 0");
407 }
408 if !params.temporal_decay_gamma.is_finite() || params.temporal_decay_gamma < 0.0 {
409 bail!("temporal_decay_gamma must be >= 0");
410 }
411 if !params.structural_decay_beta.is_finite() || params.structural_decay_beta < 0.0 {
412 bail!("structural_decay_beta must be >= 0");
413 }
414 if params.max_out_edges == 0 {
415 bail!("max_out_edges must be > 0");
416 }
417 if cfg.scaling_ns.iter().any(|n| *n == 0) {
418 bail!("scaling_ns values must be greater than zero");
419 }
420 Ok(())
421}
422
423fn generate_dataset(max_events: usize, params: &DscdParams) -> Result<Dataset> {
424 let dsfb_cfg = SimConfig {
425 steps: max_events,
426 ..SimConfig::default()
427 };
428 let event_batch = generate_dscd_events_from_dsfb(&dsfb_cfg, DsfbParams::default(), max_events)?;
429
430 let mut add_cfg = AddSimulationConfig::default();
431 let add_steps = if max_events <= 20_000 {
432 max_events.min(2_048)
433 } else {
434 max_events.min(4_096)
435 }
436 .max(128);
437 add_cfg.steps_per_run = add_steps;
438 add_cfg.multi_steps_per_run = vec![add_steps];
439 let lambda_grid = add_cfg.lambda_grid();
440 let aet = run_aet_sweep(&add_cfg, &lambda_grid)
441 .map_err(|error| anyhow!("failed to compute AET sweep for DSCD: {error}"))?;
442 let iwlt = run_iwlt_sweep(&add_cfg, &lambda_grid)
443 .map_err(|error| anyhow!("failed to compute IWLT sweep for DSCD: {error}"))?;
444 let echo_len = aet.echo_slope.len().max(1);
445 let entropy_len = iwlt.entropy_density.len().max(1);
446
447 let mut samples_by_event: BTreeMap<u64, Vec<&DscdObserverSample>> = BTreeMap::new();
448 for sample in &event_batch.observer_samples {
449 samples_by_event
450 .entry(sample.event_id)
451 .or_default()
452 .push(sample);
453 }
454
455 let mut aux_events = Vec::with_capacity(event_batch.events.len());
456 let mut raw_levels = Vec::with_capacity(event_batch.events.len());
457
458 for (idx, event) in event_batch.events.iter().enumerate() {
459 let event_id = event.id.0;
460 let samples = samples_by_event.get(&event_id).cloned().unwrap_or_default();
461
462 let mut trust_sum = 0.0;
463 let mut residual_sum = 0.0;
464 let mut observer_trust = HashMap::new();
465 let mut best_observer = 0_u32;
466 let mut best_trust = f64::NEG_INFINITY;
467 let mut rewrite_rule_id = String::from("stable_envelope");
468
469 for sample in &samples {
470 trust_sum += sample.trust;
471 residual_sum += sample.residual_summary;
472 observer_trust.insert(sample.observer_id, sample.trust);
473 if sample.trust > best_trust {
474 best_trust = sample.trust;
475 best_observer = sample.observer_id;
476 rewrite_rule_id = sample.rewrite_rule_label.to_string();
477 }
478 }
479
480 let sample_count = samples.len().max(1) as f64;
481 let avg_residual = residual_sum / sample_count;
482 let avg_trust = (trust_sum / sample_count).clamp(0.0, 1.0);
483 let residual_state = ResidualState::from_residual(avg_residual)
484 .as_str()
485 .to_string();
486
487 let echo_slope = aet.echo_slope[idx % echo_len];
488 let entropy_density = iwlt.entropy_density[idx % entropy_len];
489 let raw_structural = 0.5 * echo_slope + 0.5 * entropy_density;
490
491 let event = DscdEvent {
492 id: event_id,
493 time_index: idx as u64,
494 module_id: best_observer,
495 residual_state,
496 trust: avg_trust,
497 echo_slope,
498 entropy_density,
499 structural_level: 0.0,
500 };
501
502 aux_events.push(EventAux {
503 event,
504 rewrite_rule_id,
505 observer_trust,
506 });
507 raw_levels.push(raw_structural);
508 }
509
510 let (min_level, max_level) = raw_levels
511 .iter()
512 .fold((f64::INFINITY, f64::NEG_INFINITY), |(mn, mx), value| {
513 (mn.min(*value), mx.max(*value))
514 });
515 let level_span = (max_level - min_level).abs();
516
517 for (idx, aux) in aux_events.iter_mut().enumerate() {
518 aux.event.structural_level = if level_span <= f64::EPSILON {
519 0.5
520 } else {
521 ((raw_levels[idx] - min_level) / level_span).clamp(0.0, 1.0)
522 };
523 }
524
525 let candidate_edges = build_candidate_edges(&aux_events, params);
528 let events = aux_events.into_iter().map(|entry| entry.event).collect();
529 Ok(Dataset {
530 events,
531 candidate_edges,
532 })
533}
534
535fn build_candidate_edges(events: &[EventAux], params: &DscdParams) -> Vec<DscdEdge> {
536 if events.len() < 2 {
537 return Vec::new();
538 }
539
540 let mut edges = Vec::new();
541
542 for src_idx in 0..(events.len() - 1) {
543 let src = &events[src_idx];
544 let mut local_candidates = Vec::<CandidateEdgeDraft>::new();
545
546 let max_dst_idx = src_idx
547 .saturating_add(params.max_temporal_lag)
548 .min(events.len() - 1);
549
550 for dst_idx in (src_idx + 1)..=max_dst_idx {
551 let dst = &events[dst_idx];
552 let dt = dst_idx - src_idx;
553 if dt == 0 {
554 continue;
555 }
556
557 let d_struct = structural_distance(&src.event, &dst.event);
559 if d_struct > params.max_structural_radius {
560 continue;
561 }
562
563 let base_tau = dst
564 .observer_trust
565 .get(&dst.event.module_id)
566 .copied()
567 .unwrap_or(dst.event.trust);
568 let tau_eff = effective_trust(base_tau, dt, d_struct, params);
569 if !tau_eff.is_finite() || tau_eff <= 0.0 {
570 continue;
571 }
572
573 local_candidates.push(CandidateEdgeDraft {
574 src: src.event.id,
575 dst: dst.event.id,
576 module_id: dst.event.module_id,
577 rewrite_rule_id: src.rewrite_rule_id.clone(),
578 base_trust: base_tau,
579 effective_trust: tau_eff,
580 temporal_lag: dt,
581 structural_distance: d_struct,
582 structural_level_source: src.event.structural_level,
583 structural_level_target: dst.event.structural_level,
584 });
585 }
586
587 local_candidates.sort_by(|a, b| {
589 b.effective_trust
590 .total_cmp(&a.effective_trust)
591 .then(a.temporal_lag.cmp(&b.temporal_lag))
592 .then(a.structural_distance.total_cmp(&b.structural_distance))
593 .then(a.dst.cmp(&b.dst))
594 });
595
596 for draft in local_candidates.into_iter().take(params.max_out_edges) {
597 let edge_id = edges.len() as u64;
598 edges.push(DscdEdge {
599 edge_id,
600 src: draft.src,
601 dst: draft.dst,
602 module_id: draft.module_id,
603 rewrite_rule_id: Some(draft.rewrite_rule_id),
604 base_trust: draft.base_trust,
605 effective_trust: draft.effective_trust,
606 trust_at_creation: draft.effective_trust,
607 temporal_lag: draft.temporal_lag,
608 structural_distance: draft.structural_distance,
609 structural_level_source: draft.structural_level_source,
610 structural_level_target: draft.structural_level_target,
611 });
612 }
613 }
614
615 edges
616}
617
618fn compute_threshold_curve(
619 events: &[DscdEvent],
620 candidate_edges: &[DscdEdge],
621 taus: &[f64],
622 root_event_id: u64,
623) -> Vec<CurvePoint> {
624 taus.par_iter()
628 .map(|tau| {
629 let graph = build_graph_for_tau(events, candidate_edges, *tau);
630 CurvePoint {
631 tau: *tau,
632 expansion_ratio: expansion_ratio(&graph, root_event_id),
633 }
634 })
635 .collect()
636}
637
638fn compute_scaling_metrics(num_events: usize, curve: &[CurvePoint]) -> ScalingMetrics {
639 let max_rho = curve
640 .iter()
641 .map(|point| point.expansion_ratio)
642 .fold(0.0_f64, f64::max);
643 let high_target = 0.9 * max_rho;
644 let low_target = 0.1 * max_rho;
645 let critical_target = 0.5 * max_rho;
646
647 let tau_star = find_tau_at_or_below(curve, critical_target)
648 .unwrap_or_else(|| curve.last().map(|point| point.tau).unwrap_or_default());
649 let tau_high = find_tau_at_or_below(curve, high_target)
650 .unwrap_or_else(|| curve.first().map(|point| point.tau).unwrap_or_default());
651 let tau_low = find_tau_at_or_below(curve, low_target)
652 .unwrap_or_else(|| curve.last().map(|point| point.tau).unwrap_or(tau_high));
653
654 let max_derivative = curve
655 .windows(2)
656 .filter_map(|pair| {
657 let dt = pair[1].tau - pair[0].tau;
658 if dt.abs() <= f64::EPSILON {
659 return None;
660 }
661 Some(((pair[1].expansion_ratio - pair[0].expansion_ratio) / dt).abs())
662 })
663 .fold(0.0_f64, f64::max);
664
665 ScalingMetrics {
666 num_events,
667 tau_star,
668 transition_width: (tau_low - tau_high).max(0.0),
669 max_derivative,
670 }
671}
672
673fn find_tau_at_or_below(curve: &[CurvePoint], ratio_threshold: f64) -> Option<f64> {
674 if curve.is_empty() {
675 return None;
676 }
677 if curve[0].expansion_ratio <= ratio_threshold {
678 return Some(curve[0].tau);
679 }
680
681 for pair in curve.windows(2) {
682 let prev = &pair[0];
683 let curr = &pair[1];
684 if curr.expansion_ratio > ratio_threshold {
685 continue;
686 }
687 let drho = curr.expansion_ratio - prev.expansion_ratio;
688 if drho.abs() <= f64::EPSILON {
689 return Some(curr.tau);
690 }
691 let alpha = (ratio_threshold - prev.expansion_ratio) / drho;
692 return Some(prev.tau + alpha * (curr.tau - prev.tau));
693 }
694
695 None
696}
697
698fn write_threshold_curve_csv(path: &Path, curve: &[CurvePoint]) -> Result<()> {
699 let mut writer = Writer::from_path(path)?;
700 writer.write_record(["tau", "expansion_ratio"])?;
701 for point in curve {
702 writer.write_record([point.tau.to_string(), point.expansion_ratio.to_string()])?;
703 }
704 writer.flush()?;
705 Ok(())
706}
707
708fn write_threshold_scaling_summary_csv(path: &Path, rows: &[ScalingMetrics]) -> Result<()> {
709 let mut writer = Writer::from_path(path)?;
710 writer.write_record([
711 "num_events",
712 "tau_star",
713 "transition_width",
714 "max_derivative",
715 ])?;
716 for row in rows {
717 writer.write_record([
718 row.num_events.to_string(),
719 row.tau_star.to_string(),
720 row.transition_width.to_string(),
721 row.max_derivative.to_string(),
722 ])?;
723 }
724 writer.flush()?;
725 Ok(())
726}
727
728fn write_finite_size_scaling_csv(path: &Path, rows: &[ScalingMetrics]) -> Result<()> {
729 let mut writer = Writer::from_path(path)?;
730 writer.write_record([
731 "num_events",
732 "tau_star",
733 "transition_width",
734 "max_derivative",
735 "width_0_1_to_0_9",
736 ])?;
737 for row in rows {
738 writer.write_record([
739 row.num_events.to_string(),
740 row.tau_star.to_string(),
741 row.transition_width.to_string(),
742 row.max_derivative.to_string(),
743 row.transition_width.to_string(),
744 ])?;
745 }
746 writer.flush()?;
747 Ok(())
748}
749
750fn write_graph_events_csv(path: &Path, events: &[DscdEvent]) -> Result<()> {
751 let mut writer = Writer::from_path(path)?;
752 writer.write_record([
753 "id",
754 "time_index",
755 "module_id",
756 "residual_state",
757 "trust",
758 "echo_slope",
759 "entropy_density",
760 "structural_level",
761 ])?;
762 for event in events {
763 writer.write_record([
764 event.id.to_string(),
765 event.time_index.to_string(),
766 event.module_id.to_string(),
767 event.residual_state.clone(),
768 event.trust.to_string(),
769 event.echo_slope.to_string(),
770 event.entropy_density.to_string(),
771 event.structural_level.to_string(),
772 ])?;
773 }
774 writer.flush()?;
775 Ok(())
776}
777
778fn write_graph_edges_csv(path: &Path, edges: &[DscdEdge]) -> Result<()> {
779 let mut writer = Writer::from_path(path)?;
780 writer.write_record([
781 "edge_id",
782 "src",
783 "dst",
784 "module_id",
785 "rewrite_rule_id",
786 "base_trust",
787 "effective_trust",
788 "trust_at_creation",
789 "temporal_lag",
790 "structural_distance",
791 "structural_level_source",
792 "structural_level_target",
793 ])?;
794 for edge in edges {
795 writer.write_record([
796 edge.edge_id.to_string(),
797 edge.src.to_string(),
798 edge.dst.to_string(),
799 edge.module_id.to_string(),
800 edge.rewrite_rule_id.clone().unwrap_or_default(),
801 edge.base_trust.to_string(),
802 edge.effective_trust.to_string(),
803 edge.trust_at_creation.to_string(),
804 edge.temporal_lag.to_string(),
805 edge.structural_distance.to_string(),
806 edge.structural_level_source.to_string(),
807 edge.structural_level_target.to_string(),
808 ])?;
809 }
810 writer.flush()?;
811 Ok(())
812}
813
814fn write_degree_distribution_csv(path: &Path, graph: &DscdGraph) -> Result<()> {
815 let mut in_degree: HashMap<u64, usize> =
816 graph.events.iter().map(|event| (event.id, 0)).collect();
817 let mut out_degree: HashMap<u64, usize> =
818 graph.events.iter().map(|event| (event.id, 0)).collect();
819
820 for edge in &graph.edges {
821 *out_degree.entry(edge.src).or_insert(0) += 1;
822 *in_degree.entry(edge.dst).or_insert(0) += 1;
823 }
824
825 let mut writer = Writer::from_path(path)?;
826 writer.write_record(["event_id", "in_degree", "out_degree"])?;
827 for event in &graph.events {
828 writer.write_record([
829 event.id.to_string(),
830 in_degree.get(&event.id).copied().unwrap_or(0).to_string(),
831 out_degree.get(&event.id).copied().unwrap_or(0).to_string(),
832 ])?;
833 }
834 writer.flush()?;
835 Ok(())
836}
837
838fn write_interval_sizes_csv(path: &Path, graph: &DscdGraph, sample_count: usize) -> Result<()> {
839 let mut ids_by_time: Vec<(u64, u64)> = graph
840 .events
841 .iter()
842 .map(|event| (event.time_index, event.id))
843 .collect();
844 ids_by_time.sort_unstable();
845 let ordered_ids: Vec<u64> = ids_by_time.into_iter().map(|(_, id)| id).collect();
846
847 let mut adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
848 let mut reverse_adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
849 for edge in &graph.edges {
850 adjacency.entry(edge.src).or_default().push(edge.dst);
851 reverse_adjacency
852 .entry(edge.dst)
853 .or_default()
854 .push(edge.src);
855 }
856
857 let pairs = deterministic_pairs(&ordered_ids, sample_count);
858 let mut forward_cache: HashMap<u64, HashSet<u64>> = HashMap::new();
859 let mut reverse_cache: HashMap<u64, HashSet<u64>> = HashMap::new();
860
861 let mut writer = Writer::from_path(path)?;
862 writer.write_record(["src", "dst", "interval_size"])?;
863 for (src, dst) in pairs {
864 let forward = reachability_set(src, &adjacency, &mut forward_cache);
865 let interval_size = if !forward.contains(&dst) {
866 0
867 } else {
868 let backward = reachability_set(dst, &reverse_adjacency, &mut reverse_cache);
869 forward
870 .intersection(&backward)
871 .filter(|node| **node != src && **node != dst)
872 .count()
873 };
874
875 writer.write_record([src.to_string(), dst.to_string(), interval_size.to_string()])?;
876 }
877 writer.flush()?;
878 Ok(())
879}
880
881fn reachability_set(
882 start: u64,
883 adjacency: &HashMap<u64, Vec<u64>>,
884 cache: &mut HashMap<u64, HashSet<u64>>,
885) -> HashSet<u64> {
886 if let Some(cached) = cache.get(&start) {
887 return cached.clone();
888 }
889
890 let mut queue = VecDeque::from([start]);
891 let mut visited = HashSet::from([start]);
892 while let Some(current) = queue.pop_front() {
893 if let Some(neighbors) = adjacency.get(¤t) {
894 for next in neighbors {
895 if visited.insert(*next) {
896 queue.push_back(*next);
897 }
898 }
899 }
900 }
901
902 cache.insert(start, visited.clone());
903 visited
904}
905
906fn deterministic_pairs(ordered_ids: &[u64], sample_count: usize) -> Vec<(u64, u64)> {
907 if ordered_ids.len() < 2 || sample_count == 0 {
908 return Vec::new();
909 }
910
911 let n = ordered_ids.len();
912 let max_pairs = n.saturating_mul(n.saturating_sub(1)) / 2;
913 let target = sample_count.min(max_pairs);
914
915 let mut state = 0x9E37_79B9_7F4A_7C15_u64;
916 let mut seen = BTreeSet::new();
917 let mut pairs = Vec::with_capacity(target);
918 let mut attempts = 0_usize;
919 let max_attempts = target.saturating_mul(20).max(128);
920
921 while pairs.len() < target && attempts < max_attempts {
922 attempts += 1;
923 state = state
924 .wrapping_mul(6_364_136_223_846_793_005)
925 .wrapping_add(1);
926 let i = (state as usize) % (n - 1);
927 state = state
928 .wrapping_mul(6_364_136_223_846_793_005)
929 .wrapping_add(1);
930 let gap = ((state as usize) % (n - i - 1)) + 1;
931 let j = i + gap;
932 if seen.insert((i, j)) {
933 pairs.push((ordered_ids[i], ordered_ids[j]));
934 }
935 }
936
937 if pairs.len() < target {
938 for i in 0..(n - 1) {
939 for j in (i + 1)..n {
940 if seen.insert((i, j)) {
941 pairs.push((ordered_ids[i], ordered_ids[j]));
942 if pairs.len() == target {
943 return pairs;
944 }
945 }
946 }
947 }
948 }
949
950 pairs
951}
952
953fn write_path_lengths_csv(path: &Path, graph: &DscdGraph, root_event_id: u64) -> Result<()> {
954 let mut events = graph.events.clone();
955 events.sort_by_key(|event| event.time_index);
956
957 let mut adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
958 for edge in &graph.edges {
959 adjacency.entry(edge.src).or_default().push(edge.dst);
960 }
961
962 let mut distances: HashMap<u64, Option<usize>> =
963 events.iter().map(|event| (event.id, None)).collect();
964 if distances.contains_key(&root_event_id) {
965 distances.insert(root_event_id, Some(0));
966 }
967
968 for event in &events {
969 let Some(Some(dist)) = distances.get(&event.id).copied() else {
970 continue;
971 };
972
973 if let Some(neighbors) = adjacency.get(&event.id) {
974 for next in neighbors {
975 let candidate = dist + 1;
976 let current = distances.get(next).copied().flatten();
977 if current.map_or(true, |value| candidate > value) {
978 distances.insert(*next, Some(candidate));
979 }
980 }
981 }
982 }
983
984 let mut writer = Writer::from_path(path)?;
985 writer.write_record(["event_id", "longest_path_length"])?;
986 for event in &events {
987 writer.write_record([
988 event.id.to_string(),
989 distances
990 .get(&event.id)
991 .copied()
992 .flatten()
993 .unwrap_or(0)
994 .to_string(),
995 ])?;
996 }
997 writer.flush()?;
998 Ok(())
999}
1000
1001fn write_edge_provenance_csv(
1002 path: &Path,
1003 graph: &DscdGraph,
1004 tau_star: f64,
1005 limit: usize,
1006) -> Result<()> {
1007 let event_by_id: HashMap<u64, &DscdEvent> =
1008 graph.events.iter().map(|event| (event.id, event)).collect();
1009 let mut writer = Writer::from_path(path)?;
1010 writer.write_record([
1011 "edge_id",
1012 "source_event",
1013 "target_event",
1014 "tau_star",
1015 "base_trust",
1016 "effective_trust",
1017 "dt",
1018 "d_struct",
1019 "structural_level_source",
1020 "structural_level_target",
1021 "module_id",
1022 "rewrite_rule_id",
1023 "residual_state_source",
1024 "residual_state_target",
1025 ])?;
1026
1027 for edge in graph.edges.iter().take(limit) {
1028 let Some(src) = event_by_id.get(&edge.src) else {
1029 continue;
1030 };
1031 let Some(dst) = event_by_id.get(&edge.dst) else {
1032 continue;
1033 };
1034 writer.write_record([
1035 edge.edge_id.to_string(),
1036 edge.src.to_string(),
1037 edge.dst.to_string(),
1038 tau_star.to_string(),
1039 edge.base_trust.to_string(),
1040 edge.effective_trust.to_string(),
1041 edge.temporal_lag.to_string(),
1042 edge.structural_distance.to_string(),
1043 edge.structural_level_source.to_string(),
1044 edge.structural_level_target.to_string(),
1045 edge.module_id.to_string(),
1046 edge.rewrite_rule_id.clone().unwrap_or_default(),
1047 src.residual_state.clone(),
1048 dst.residual_state.clone(),
1049 ])?;
1050 }
1051 writer.flush()?;
1052 Ok(())
1053}
1054
1055pub fn create_dscd_run_dir(output_root: &Path) -> Result<PathBuf> {
1057 fs::create_dir_all(output_root).with_context(|| {
1058 format!(
1059 "failed to create DSCD output root {}",
1060 output_root.display()
1061 )
1062 })?;
1063
1064 let stamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
1065
1066 let mut run_dir = output_root.join(&stamp);
1067 let mut suffix = 1_usize;
1068 while run_dir.exists() {
1069 run_dir = output_root.join(format!("{stamp}_{suffix:02}"));
1070 suffix += 1;
1071 }
1072
1073 fs::create_dir_all(&run_dir)
1074 .with_context(|| format!("failed to create {}", run_dir.display()))?;
1075 Ok(run_dir)
1076}
1077
1078#[cfg(test)]
1079mod tests {
1080 use super::*;
1081 use std::time::{SystemTime, UNIX_EPOCH};
1082
1083 fn toy_events() -> Vec<DscdEvent> {
1084 (0..8_u64)
1085 .map(|idx| DscdEvent {
1086 id: idx,
1087 time_index: idx,
1088 module_id: (idx % 2) as u32,
1089 residual_state: "M".to_string(),
1090 trust: 0.9 - 0.05 * idx as f64,
1091 echo_slope: idx as f64 * 0.1,
1092 entropy_density: idx as f64 * 0.1,
1093 structural_level: idx as f64 * 0.05,
1094 })
1095 .collect()
1096 }
1097
1098 fn toy_candidates(events: &[DscdEvent], params: &DscdParams) -> Vec<DscdEdge> {
1099 let aux: Vec<EventAux> = events
1100 .iter()
1101 .map(|event| {
1102 let mut observer_trust = HashMap::new();
1103 observer_trust.insert(event.module_id, event.trust);
1104 EventAux {
1105 event: event.clone(),
1106 rewrite_rule_id: "stable_envelope".to_string(),
1107 observer_trust,
1108 }
1109 })
1110 .collect();
1111 build_candidate_edges(&aux, params)
1112 }
1113
1114 #[test]
1115 fn trust_gate_respects_tau_and_time_order() {
1116 let events = vec![
1117 DscdEvent {
1118 id: 0,
1119 time_index: 0,
1120 module_id: 0,
1121 residual_state: "L".to_string(),
1122 trust: 0.9,
1123 echo_slope: 0.1,
1124 entropy_density: 0.2,
1125 structural_level: 0.1,
1126 },
1127 DscdEvent {
1128 id: 1,
1129 time_index: 1,
1130 module_id: 0,
1131 residual_state: "M".to_string(),
1132 trust: 0.7,
1133 echo_slope: 0.2,
1134 entropy_density: 0.3,
1135 structural_level: 0.2,
1136 },
1137 ];
1138 let edges = vec![
1139 DscdEdge {
1140 edge_id: 0,
1141 src: 0,
1142 dst: 1,
1143 module_id: 0,
1144 rewrite_rule_id: Some("rule_a".to_string()),
1145 base_trust: 0.8,
1146 effective_trust: 0.8,
1147 trust_at_creation: 0.8,
1148 temporal_lag: 1,
1149 structural_distance: 0.1,
1150 structural_level_source: 0.1,
1151 structural_level_target: 0.2,
1152 },
1153 DscdEdge {
1154 edge_id: 1,
1155 src: 1,
1156 dst: 0,
1157 module_id: 0,
1158 rewrite_rule_id: Some("rule_b".to_string()),
1159 base_trust: 0.9,
1160 effective_trust: 0.9,
1161 trust_at_creation: 0.9,
1162 temporal_lag: 1,
1163 structural_distance: 0.1,
1164 structural_level_source: 0.2,
1165 structural_level_target: 0.1,
1166 },
1167 ];
1168
1169 let graph = build_graph_for_tau(&events, &edges, 0.75);
1170 assert_eq!(graph.edges.len(), 1);
1171 assert_eq!(graph.edges[0].src, 0);
1172 assert_eq!(graph.edges[0].dst, 1);
1173 }
1174
1175 #[test]
1176 fn local_rule_enforces_lag_radius_and_outdegree() {
1177 let params = DscdParams {
1178 num_events: 128,
1179 tau_min: 0.0,
1180 tau_max: 1.0,
1181 tau_steps: 8,
1182 max_temporal_lag: 2,
1183 max_structural_radius: 0.08,
1184 temporal_decay_gamma: 0.0,
1185 structural_decay_beta: 2.0,
1186 max_out_edges: 2,
1187 };
1188 let events = toy_events();
1189 let candidates = toy_candidates(&events, ¶ms);
1190
1191 let mut out_degree = HashMap::<u64, usize>::new();
1192 for edge in &candidates {
1193 let src_event = events
1194 .iter()
1195 .find(|event| event.id == edge.src)
1196 .expect("src");
1197 let dst_event = events
1198 .iter()
1199 .find(|event| event.id == edge.dst)
1200 .expect("dst");
1201
1202 let lag = dst_event.time_index - src_event.time_index;
1203 assert!(lag as usize <= params.max_temporal_lag);
1204
1205 let d_struct = structural_distance(src_event, dst_event);
1206 assert!(d_struct <= params.max_structural_radius + 1.0e-12);
1207
1208 *out_degree.entry(edge.src).or_insert(0) += 1;
1209 }
1210
1211 assert!(out_degree
1212 .values()
1213 .all(|degree| *degree <= params.max_out_edges));
1214 }
1215
1216 #[test]
1217 fn threshold_monotonicity_holds() {
1218 let params = DscdParams {
1219 num_events: 128,
1220 tau_min: 0.0,
1221 tau_max: 1.0,
1222 tau_steps: 8,
1223 max_temporal_lag: 4,
1224 max_structural_radius: 1.0,
1225 temporal_decay_gamma: 0.01,
1226 structural_decay_beta: 2.0,
1227 max_out_edges: 3,
1228 };
1229 let events = toy_events();
1230 let candidates = toy_candidates(&events, ¶ms);
1231
1232 let tau_low = 0.2;
1233 let tau_high = 0.6;
1234 let graph_low = build_graph_for_tau(&events, &candidates, tau_low);
1235 let graph_high = build_graph_for_tau(&events, &candidates, tau_high);
1236
1237 let low_set: HashSet<(u64, u64)> = graph_low
1238 .edges
1239 .iter()
1240 .map(|edge| (edge.src, edge.dst))
1241 .collect();
1242 for edge in &graph_high.edges {
1243 assert!(low_set.contains(&(edge.src, edge.dst)));
1244 }
1245 }
1246
1247 #[test]
1248 fn tiny_scaling_run_writes_required_outputs() {
1249 let out_dir = std::env::temp_dir().join(format!(
1250 "dsfb-dscd-sim-test-{}",
1251 SystemTime::now()
1252 .duration_since(UNIX_EPOCH)
1253 .expect("clock")
1254 .as_nanos()
1255 ));
1256
1257 let cfg = DscdConfig {
1258 params: DscdParams {
1259 num_events: 128,
1260 tau_min: 0.0,
1261 tau_max: 1.0,
1262 tau_steps: 3,
1263 max_temporal_lag: 8,
1264 max_structural_radius: 1.0,
1265 temporal_decay_gamma: 0.0,
1266 structural_decay_beta: 1.0,
1267 max_out_edges: 4,
1268 },
1269 root_event_id: 0,
1270 output_dir: out_dir.clone(),
1271 scaling_ns: vec![64, 128],
1272 };
1273 run_dscd_simulation(&cfg).expect("simulation should run");
1274
1275 assert!(out_dir.join("threshold_scaling_summary.csv").exists());
1276 assert!(out_dir.join("finite_size_scaling.csv").exists());
1277 assert!(out_dir.join("threshold_curve_N_64.csv").exists());
1278 assert!(out_dir.join("threshold_curve_N_128.csv").exists());
1279 assert!(out_dir.join("graph_events.csv").exists());
1280 assert!(out_dir.join("graph_edges.csv").exists());
1281 assert!(out_dir.join("edge_provenance_0.csv").exists());
1282
1283 let mut reader = csv::Reader::from_path(out_dir.join("threshold_scaling_summary.csv"))
1284 .expect("summary csv");
1285 for row in reader.records() {
1286 let row = row.expect("csv row");
1287 let tau_star: f64 = row.get(1).expect("tau_star").parse().expect("parse tau");
1288 let width: f64 = row.get(2).expect("width").parse().expect("parse width");
1289 assert!((0.0..=1.0).contains(&tau_star));
1290 assert!(width >= 0.0);
1291 }
1292
1293 let _ = fs::remove_dir_all(out_dir);
1294 }
1295}