Skip to main content

dsfb_dscd/
paper.rs

1//! DSCD paper-facing deterministic simulation and export pipeline.
2//!
3//! This module layers Deterministic Structural Causal Dynamics (DSCD) on top
4//! of DSFB + ADD. The edge rule is explicitly local and deterministic:
5//! temporal span bounds, structural radius bounds, and trust decay over
6//! temporal/structural distance. This supports certification-friendly graph
7//! complexity and finite-size threshold analysis in the DSCD paper.
8
9use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
10use std::fs;
11use std::path::{Path, PathBuf};
12
13use anyhow::{anyhow, bail, Context, Result};
14use csv::Writer;
15use dsfb::sim::SimConfig;
16use dsfb::DsfbParams;
17use dsfb_add::aet::run_aet_sweep;
18use dsfb_add::iwlt::run_iwlt_sweep;
19use dsfb_add::SimulationConfig as AddSimulationConfig;
20use rayon::prelude::*;
21use serde::{Deserialize, Serialize};
22
23use crate::integrations::{generate_dscd_events_from_dsfb, DscdObserverSample, ResidualState};
24
25/// DSCD locality and threshold parameters.
26///
27/// These parameters are deterministic and directly control the local-causal
28/// edge rule used for DSCD graph construction:
29/// - Locality (temporal + structural) supports interval finiteness.
30/// - Distance-based trust decay supports nontrivial threshold transition shape.
31/// - Out-degree bound keeps complexity certification-friendly.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct DscdParams {
34    pub num_events: usize,
35    pub tau_min: f64,
36    pub tau_max: f64,
37    pub tau_steps: usize,
38    pub max_temporal_lag: usize,
39    pub max_structural_radius: f64,
40    pub temporal_decay_gamma: f64,
41    pub structural_decay_beta: f64,
42    pub max_out_edges: usize,
43}
44
45impl DscdParams {
46    /// Demo preset tuned for free Colab runtimes.
47    pub fn demo() -> Self {
48        Self {
49            num_events: 10_000,
50            tau_min: 0.0,
51            tau_max: 1.0,
52            tau_steps: 64,
53            max_temporal_lag: 64,
54            max_structural_radius: 0.1,
55            temporal_decay_gamma: 0.0,
56            structural_decay_beta: 2.0,
57            max_out_edges: 8,
58        }
59    }
60
61    /// Performance preset for workstation runs.
62    pub fn performance() -> Self {
63        Self {
64            num_events: 100_000,
65            tau_min: 0.0,
66            tau_max: 1.0,
67            tau_steps: 256,
68            max_temporal_lag: 64,
69            max_structural_radius: 0.1,
70            temporal_decay_gamma: 0.0,
71            structural_decay_beta: 2.0,
72            max_out_edges: 8,
73        }
74    }
75
76    pub fn tau_grid(&self) -> Vec<f64> {
77        linspace(self.tau_min, self.tau_max, self.tau_steps)
78    }
79}
80
81impl Default for DscdParams {
82    fn default() -> Self {
83        Self::demo()
84    }
85}
86
87/// Runtime configuration for DSCD simulation + scaling outputs.
88#[derive(Debug, Clone)]
89pub struct DscdConfig {
90    pub params: DscdParams,
91    pub root_event_id: u64,
92    pub output_dir: PathBuf,
93    pub scaling_ns: Vec<usize>,
94}
95
96impl Default for DscdConfig {
97    fn default() -> Self {
98        Self {
99            params: DscdParams::default(),
100            root_event_id: 0,
101            output_dir: PathBuf::from("output-dsfb-dscd"),
102            scaling_ns: vec![5_000, 10_000, 20_000, 50_000, 100_000],
103        }
104    }
105}
106
107/// DSCD event node exported for causal graph and scaling analyses.
108///
109/// EVENT TYPE NOTE:
110/// This is the canonical event struct written to `graph_events.csv`. It
111/// carries event/time indices, trust, and ADD-derived structure.
112#[derive(Debug, Clone, Serialize)]
113pub struct DscdEvent {
114    pub id: u64,
115    pub time_index: u64,
116    pub module_id: u32,
117    pub residual_state: String,
118    pub trust: f64,
119    pub echo_slope: f64,
120    pub entropy_density: f64,
121    pub structural_level: f64,
122}
123
124/// DSCD directed edge candidate with locality + trust-decay provenance.
125#[derive(Debug, Clone, Serialize)]
126pub struct DscdEdge {
127    pub edge_id: u64,
128    pub src: u64,
129    pub dst: u64,
130    pub module_id: u32,
131    pub rewrite_rule_id: Option<String>,
132    pub base_trust: f64,
133    pub effective_trust: f64,
134    pub trust_at_creation: f64,
135    pub temporal_lag: usize,
136    pub structural_distance: f64,
137    pub structural_level_source: f64,
138    pub structural_level_target: f64,
139}
140
141/// Trust-gated DSCD graph at a fixed threshold.
142#[derive(Debug, Clone, Default)]
143pub struct DscdGraph {
144    pub events: Vec<DscdEvent>,
145    pub edges: Vec<DscdEdge>,
146}
147
148/// DSCD error type alias.
149pub type DscdError = anyhow::Error;
150
151#[derive(Debug, Clone)]
152struct CurvePoint {
153    tau: f64,
154    expansion_ratio: f64,
155}
156
157#[derive(Debug, Clone)]
158struct ScalingMetrics {
159    num_events: usize,
160    tau_star: f64,
161    transition_width: f64,
162    max_derivative: f64,
163}
164
165#[derive(Debug, Clone)]
166struct EventAux {
167    event: DscdEvent,
168    rewrite_rule_id: String,
169    observer_trust: HashMap<u32, f64>,
170}
171
172#[derive(Debug, Clone)]
173struct Dataset {
174    events: Vec<DscdEvent>,
175    candidate_edges: Vec<DscdEdge>,
176}
177
178#[derive(Debug, Clone)]
179struct CandidateEdgeDraft {
180    src: u64,
181    dst: u64,
182    module_id: u32,
183    rewrite_rule_id: String,
184    base_trust: f64,
185    effective_trust: f64,
186    temporal_lag: usize,
187    structural_distance: f64,
188    structural_level_source: f64,
189    structural_level_target: f64,
190}
191
192/// Deterministic structural distance between two events.
193pub fn structural_distance(e_i: &DscdEvent, e_j: &DscdEvent) -> f64 {
194    (e_j.structural_level - e_i.structural_level).abs()
195}
196
197/// Deterministic trust attenuation over temporal + structural separation.
198pub fn effective_trust(base_tau: f64, dt: usize, d_struct: f64, params: &DscdParams) -> f64 {
199    let dt_f = dt as f64;
200    let decay_temporal = (-params.temporal_decay_gamma * dt_f).exp();
201    let decay_struct = (-params.structural_decay_beta * d_struct).exp();
202    base_tau * decay_temporal * decay_struct
203}
204
205/// Build thresholded graph `G_tau` from precomputed local candidates.
206///
207/// EDGE SWEEP NOTE:
208/// This is the threshold gate used by the tau-sweep writer. Monotonicity holds:
209/// increasing `tau` can only remove edges.
210pub fn build_graph_for_tau(
211    events: &[DscdEvent],
212    candidate_edges: &[DscdEdge],
213    tau: f64,
214) -> DscdGraph {
215    let time_by_id: HashMap<u64, u64> = events
216        .iter()
217        .map(|event| (event.id, event.time_index))
218        .collect();
219
220    let mut edges = Vec::new();
221    for edge in candidate_edges {
222        if edge.effective_trust < tau {
223            continue;
224        }
225
226        let Some(&src_time) = time_by_id.get(&edge.src) else {
227            continue;
228        };
229        let Some(&dst_time) = time_by_id.get(&edge.dst) else {
230            continue;
231        };
232
233        // Enforce acyclic time ordering.
234        if src_time < dst_time {
235            edges.push(edge.clone());
236        }
237    }
238
239    DscdGraph {
240        events: events.to_vec(),
241        edges,
242    }
243}
244
245/// Compute reachable component size from `root_id` using deterministic BFS.
246pub fn compute_reachable_component_size(graph: &DscdGraph, root_id: u64) -> usize {
247    if graph.events.is_empty() || !graph.events.iter().any(|event| event.id == root_id) {
248        return 0;
249    }
250
251    let mut adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
252    for edge in &graph.edges {
253        adjacency.entry(edge.src).or_default().push(edge.dst);
254    }
255    for neighbors in adjacency.values_mut() {
256        neighbors.sort_unstable();
257    }
258
259    let mut queue = VecDeque::from([root_id]);
260    let mut visited = HashSet::from([root_id]);
261    while let Some(current) = queue.pop_front() {
262        if let Some(neighbors) = adjacency.get(&current) {
263            for next in neighbors {
264                if visited.insert(*next) {
265                    queue.push_back(*next);
266                }
267            }
268        }
269    }
270
271    visited.len()
272}
273
274/// Reachable fraction for the root component in the trust-gated graph.
275pub fn expansion_ratio(graph: &DscdGraph, root_id: u64) -> f64 {
276    if graph.events.is_empty() {
277        return 0.0;
278    }
279    compute_reachable_component_size(graph, root_id) as f64 / graph.events.len() as f64
280}
281
282/// Run deterministic DSCD simulation, threshold sweep, and scaling exports.
283pub fn run_dscd_simulation(cfg: &DscdConfig) -> std::result::Result<(), DscdError> {
284    validate_config(cfg)?;
285    fs::create_dir_all(&cfg.output_dir).with_context(|| {
286        format!(
287            "failed to create DSCD output directory {}",
288            cfg.output_dir.display()
289        )
290    })?;
291
292    let mut run_ns = cfg.scaling_ns.clone();
293    run_ns.push(cfg.params.num_events);
294    run_ns.sort_unstable();
295    run_ns.dedup();
296
297    let max_n = run_ns
298        .iter()
299        .copied()
300        .max()
301        .ok_or_else(|| anyhow!("no event counts configured"))?;
302    let dataset = generate_dataset(max_n, &cfg.params)?;
303    let taus = cfg.params.tau_grid();
304
305    let mut summary_rows = Vec::new();
306    let mut curve_by_n: HashMap<usize, Vec<CurvePoint>> = HashMap::new();
307    let mut main_events = Vec::new();
308    let mut main_edges = Vec::new();
309
310    for n in run_ns {
311        let events = dataset.events[..n].to_vec();
312        let candidate_edges: Vec<DscdEdge> = dataset
313            .candidate_edges
314            .iter()
315            .filter(|edge| edge.src < n as u64 && edge.dst < n as u64)
316            .cloned()
317            .collect();
318
319        let curve = compute_threshold_curve(&events, &candidate_edges, &taus, cfg.root_event_id);
320        write_threshold_curve_csv(
321            &cfg.output_dir.join(format!("threshold_curve_N_{n}.csv")),
322            &curve,
323        )?;
324
325        let metrics = compute_scaling_metrics(n, &curve);
326        summary_rows.push(metrics.clone());
327        curve_by_n.insert(n, curve);
328
329        if n == cfg.params.num_events {
330            main_events = events;
331            main_edges = candidate_edges;
332        }
333    }
334
335    write_threshold_scaling_summary_csv(
336        &cfg.output_dir.join("threshold_scaling_summary.csv"),
337        &summary_rows,
338    )?;
339    write_finite_size_scaling_csv(
340        &cfg.output_dir.join("finite_size_scaling.csv"),
341        &summary_rows,
342    )?;
343
344    let Some(main_curve) = curve_by_n.get(&cfg.params.num_events) else {
345        bail!(
346            "missing threshold curve for main num_events={}",
347            cfg.params.num_events
348        );
349    };
350    let main_metrics = compute_scaling_metrics(cfg.params.num_events, main_curve);
351    let main_tau = main_metrics.tau_star;
352
353    write_graph_events_csv(&cfg.output_dir.join("graph_events.csv"), &main_events)?;
354    let final_graph = build_graph_for_tau(&main_events, &main_edges, main_tau);
355    write_graph_edges_csv(&cfg.output_dir.join("graph_edges.csv"), &final_graph.edges)?;
356    write_degree_distribution_csv(
357        &cfg.output_dir.join("degree_distribution.csv"),
358        &final_graph,
359    )?;
360    write_interval_sizes_csv(
361        &cfg.output_dir.join("interval_sizes.csv"),
362        &final_graph,
363        1_024,
364    )?;
365    write_path_lengths_csv(
366        &cfg.output_dir.join("path_lengths.csv"),
367        &final_graph,
368        cfg.root_event_id,
369    )?;
370
371    let provenance_path = cfg.output_dir.join("edge_provenance_0.csv");
372    write_edge_provenance_csv(&provenance_path, &final_graph, main_tau, 10)?;
373    let compatibility_path = cfg.output_dir.join("edge_provenance.csv");
374    write_edge_provenance_csv(&compatibility_path, &final_graph, main_tau, 10)?;
375
376    Ok(())
377}
378
379/// Build evenly spaced deterministic threshold grid.
380pub fn linspace(start: f64, end: f64, count: usize) -> Vec<f64> {
381    if count <= 1 {
382        return vec![start];
383    }
384    let step = (end - start) / (count.saturating_sub(1) as f64);
385    (0..count).map(|idx| start + step * idx as f64).collect()
386}
387
388fn validate_config(cfg: &DscdConfig) -> Result<()> {
389    let params = &cfg.params;
390    if params.num_events == 0 {
391        bail!("num_events must be greater than zero");
392    }
393    if params.tau_steps < 2 {
394        bail!("tau_steps must be at least 2");
395    }
396    if !params.tau_min.is_finite() || !params.tau_max.is_finite() {
397        bail!("tau bounds must be finite");
398    }
399    if params.tau_max < params.tau_min {
400        bail!("tau_max must be >= tau_min");
401    }
402    if params.max_temporal_lag == 0 {
403        bail!("max_temporal_lag must be > 0");
404    }
405    if !params.max_structural_radius.is_finite() || params.max_structural_radius <= 0.0 {
406        bail!("max_structural_radius must be > 0");
407    }
408    if !params.temporal_decay_gamma.is_finite() || params.temporal_decay_gamma < 0.0 {
409        bail!("temporal_decay_gamma must be >= 0");
410    }
411    if !params.structural_decay_beta.is_finite() || params.structural_decay_beta < 0.0 {
412        bail!("structural_decay_beta must be >= 0");
413    }
414    if params.max_out_edges == 0 {
415        bail!("max_out_edges must be > 0");
416    }
417    if cfg.scaling_ns.iter().any(|n| *n == 0) {
418        bail!("scaling_ns values must be greater than zero");
419    }
420    Ok(())
421}
422
423fn generate_dataset(max_events: usize, params: &DscdParams) -> Result<Dataset> {
424    let dsfb_cfg = SimConfig {
425        steps: max_events,
426        ..SimConfig::default()
427    };
428    let event_batch = generate_dscd_events_from_dsfb(&dsfb_cfg, DsfbParams::default(), max_events)?;
429
430    let mut add_cfg = AddSimulationConfig::default();
431    let add_steps = if max_events <= 20_000 {
432        max_events.min(2_048)
433    } else {
434        max_events.min(4_096)
435    }
436    .max(128);
437    add_cfg.steps_per_run = add_steps;
438    add_cfg.multi_steps_per_run = vec![add_steps];
439    let lambda_grid = add_cfg.lambda_grid();
440    let aet = run_aet_sweep(&add_cfg, &lambda_grid)
441        .map_err(|error| anyhow!("failed to compute AET sweep for DSCD: {error}"))?;
442    let iwlt = run_iwlt_sweep(&add_cfg, &lambda_grid)
443        .map_err(|error| anyhow!("failed to compute IWLT sweep for DSCD: {error}"))?;
444    let echo_len = aet.echo_slope.len().max(1);
445    let entropy_len = iwlt.entropy_density.len().max(1);
446
447    let mut samples_by_event: BTreeMap<u64, Vec<&DscdObserverSample>> = BTreeMap::new();
448    for sample in &event_batch.observer_samples {
449        samples_by_event
450            .entry(sample.event_id)
451            .or_default()
452            .push(sample);
453    }
454
455    let mut aux_events = Vec::with_capacity(event_batch.events.len());
456    let mut raw_levels = Vec::with_capacity(event_batch.events.len());
457
458    for (idx, event) in event_batch.events.iter().enumerate() {
459        let event_id = event.id.0;
460        let samples = samples_by_event.get(&event_id).cloned().unwrap_or_default();
461
462        let mut trust_sum = 0.0;
463        let mut residual_sum = 0.0;
464        let mut observer_trust = HashMap::new();
465        let mut best_observer = 0_u32;
466        let mut best_trust = f64::NEG_INFINITY;
467        let mut rewrite_rule_id = String::from("stable_envelope");
468
469        for sample in &samples {
470            trust_sum += sample.trust;
471            residual_sum += sample.residual_summary;
472            observer_trust.insert(sample.observer_id, sample.trust);
473            if sample.trust > best_trust {
474                best_trust = sample.trust;
475                best_observer = sample.observer_id;
476                rewrite_rule_id = sample.rewrite_rule_label.to_string();
477            }
478        }
479
480        let sample_count = samples.len().max(1) as f64;
481        let avg_residual = residual_sum / sample_count;
482        let avg_trust = (trust_sum / sample_count).clamp(0.0, 1.0);
483        let residual_state = ResidualState::from_residual(avg_residual)
484            .as_str()
485            .to_string();
486
487        let echo_slope = aet.echo_slope[idx % echo_len];
488        let entropy_density = iwlt.entropy_density[idx % entropy_len];
489        let raw_structural = 0.5 * echo_slope + 0.5 * entropy_density;
490
491        let event = DscdEvent {
492            id: event_id,
493            time_index: idx as u64,
494            module_id: best_observer,
495            residual_state,
496            trust: avg_trust,
497            echo_slope,
498            entropy_density,
499            structural_level: 0.0,
500        };
501
502        aux_events.push(EventAux {
503            event,
504            rewrite_rule_id,
505            observer_trust,
506        });
507        raw_levels.push(raw_structural);
508    }
509
510    let (min_level, max_level) = raw_levels
511        .iter()
512        .fold((f64::INFINITY, f64::NEG_INFINITY), |(mn, mx), value| {
513            (mn.min(*value), mx.max(*value))
514        });
515    let level_span = (max_level - min_level).abs();
516
517    for (idx, aux) in aux_events.iter_mut().enumerate() {
518        aux.event.structural_level = if level_span <= f64::EPSILON {
519            0.5
520        } else {
521            ((raw_levels[idx] - min_level) / level_span).clamp(0.0, 1.0)
522        };
523    }
524
525    // EDGE CONSTRUCTION NOTE:
526    // Local candidate generation + bounded out-degree happen here.
527    let candidate_edges = build_candidate_edges(&aux_events, params);
528    let events = aux_events.into_iter().map(|entry| entry.event).collect();
529    Ok(Dataset {
530        events,
531        candidate_edges,
532    })
533}
534
535fn build_candidate_edges(events: &[EventAux], params: &DscdParams) -> Vec<DscdEdge> {
536    if events.len() < 2 {
537        return Vec::new();
538    }
539
540    let mut edges = Vec::new();
541
542    for src_idx in 0..(events.len() - 1) {
543        let src = &events[src_idx];
544        let mut local_candidates = Vec::<CandidateEdgeDraft>::new();
545
546        let max_dst_idx = src_idx
547            .saturating_add(params.max_temporal_lag)
548            .min(events.len() - 1);
549
550        for dst_idx in (src_idx + 1)..=max_dst_idx {
551            let dst = &events[dst_idx];
552            let dt = dst_idx - src_idx;
553            if dt == 0 {
554                continue;
555            }
556
557            // Locality constraints: temporal lag + structural neighborhood.
558            let d_struct = structural_distance(&src.event, &dst.event);
559            if d_struct > params.max_structural_radius {
560                continue;
561            }
562
563            let base_tau = dst
564                .observer_trust
565                .get(&dst.event.module_id)
566                .copied()
567                .unwrap_or(dst.event.trust);
568            let tau_eff = effective_trust(base_tau, dt, d_struct, params);
569            if !tau_eff.is_finite() || tau_eff <= 0.0 {
570                continue;
571            }
572
573            local_candidates.push(CandidateEdgeDraft {
574                src: src.event.id,
575                dst: dst.event.id,
576                module_id: dst.event.module_id,
577                rewrite_rule_id: src.rewrite_rule_id.clone(),
578                base_trust: base_tau,
579                effective_trust: tau_eff,
580                temporal_lag: dt,
581                structural_distance: d_struct,
582                structural_level_source: src.event.structural_level,
583                structural_level_target: dst.event.structural_level,
584            });
585        }
586
587        // Bounded out-degree selection: best local edges first.
588        local_candidates.sort_by(|a, b| {
589            b.effective_trust
590                .total_cmp(&a.effective_trust)
591                .then(a.temporal_lag.cmp(&b.temporal_lag))
592                .then(a.structural_distance.total_cmp(&b.structural_distance))
593                .then(a.dst.cmp(&b.dst))
594        });
595
596        for draft in local_candidates.into_iter().take(params.max_out_edges) {
597            let edge_id = edges.len() as u64;
598            edges.push(DscdEdge {
599                edge_id,
600                src: draft.src,
601                dst: draft.dst,
602                module_id: draft.module_id,
603                rewrite_rule_id: Some(draft.rewrite_rule_id),
604                base_trust: draft.base_trust,
605                effective_trust: draft.effective_trust,
606                trust_at_creation: draft.effective_trust,
607                temporal_lag: draft.temporal_lag,
608                structural_distance: draft.structural_distance,
609                structural_level_source: draft.structural_level_source,
610                structural_level_target: draft.structural_level_target,
611            });
612        }
613    }
614
615    edges
616}
617
618fn compute_threshold_curve(
619    events: &[DscdEvent],
620    candidate_edges: &[DscdEdge],
621    taus: &[f64],
622    root_event_id: u64,
623) -> Vec<CurvePoint> {
624    // THRESHOLD SWEEP NOTE:
625    // This loop writes rho(tau) through repeated threshold gating of the same
626    // local candidate set.
627    taus.par_iter()
628        .map(|tau| {
629            let graph = build_graph_for_tau(events, candidate_edges, *tau);
630            CurvePoint {
631                tau: *tau,
632                expansion_ratio: expansion_ratio(&graph, root_event_id),
633            }
634        })
635        .collect()
636}
637
638fn compute_scaling_metrics(num_events: usize, curve: &[CurvePoint]) -> ScalingMetrics {
639    let max_rho = curve
640        .iter()
641        .map(|point| point.expansion_ratio)
642        .fold(0.0_f64, f64::max);
643    let high_target = 0.9 * max_rho;
644    let low_target = 0.1 * max_rho;
645    let critical_target = 0.5 * max_rho;
646
647    let tau_star = find_tau_at_or_below(curve, critical_target)
648        .unwrap_or_else(|| curve.last().map(|point| point.tau).unwrap_or_default());
649    let tau_high = find_tau_at_or_below(curve, high_target)
650        .unwrap_or_else(|| curve.first().map(|point| point.tau).unwrap_or_default());
651    let tau_low = find_tau_at_or_below(curve, low_target)
652        .unwrap_or_else(|| curve.last().map(|point| point.tau).unwrap_or(tau_high));
653
654    let max_derivative = curve
655        .windows(2)
656        .filter_map(|pair| {
657            let dt = pair[1].tau - pair[0].tau;
658            if dt.abs() <= f64::EPSILON {
659                return None;
660            }
661            Some(((pair[1].expansion_ratio - pair[0].expansion_ratio) / dt).abs())
662        })
663        .fold(0.0_f64, f64::max);
664
665    ScalingMetrics {
666        num_events,
667        tau_star,
668        transition_width: (tau_low - tau_high).max(0.0),
669        max_derivative,
670    }
671}
672
673fn find_tau_at_or_below(curve: &[CurvePoint], ratio_threshold: f64) -> Option<f64> {
674    if curve.is_empty() {
675        return None;
676    }
677    if curve[0].expansion_ratio <= ratio_threshold {
678        return Some(curve[0].tau);
679    }
680
681    for pair in curve.windows(2) {
682        let prev = &pair[0];
683        let curr = &pair[1];
684        if curr.expansion_ratio > ratio_threshold {
685            continue;
686        }
687        let drho = curr.expansion_ratio - prev.expansion_ratio;
688        if drho.abs() <= f64::EPSILON {
689            return Some(curr.tau);
690        }
691        let alpha = (ratio_threshold - prev.expansion_ratio) / drho;
692        return Some(prev.tau + alpha * (curr.tau - prev.tau));
693    }
694
695    None
696}
697
698fn write_threshold_curve_csv(path: &Path, curve: &[CurvePoint]) -> Result<()> {
699    let mut writer = Writer::from_path(path)?;
700    writer.write_record(["tau", "expansion_ratio"])?;
701    for point in curve {
702        writer.write_record([point.tau.to_string(), point.expansion_ratio.to_string()])?;
703    }
704    writer.flush()?;
705    Ok(())
706}
707
708fn write_threshold_scaling_summary_csv(path: &Path, rows: &[ScalingMetrics]) -> Result<()> {
709    let mut writer = Writer::from_path(path)?;
710    writer.write_record([
711        "num_events",
712        "tau_star",
713        "transition_width",
714        "max_derivative",
715    ])?;
716    for row in rows {
717        writer.write_record([
718            row.num_events.to_string(),
719            row.tau_star.to_string(),
720            row.transition_width.to_string(),
721            row.max_derivative.to_string(),
722        ])?;
723    }
724    writer.flush()?;
725    Ok(())
726}
727
728fn write_finite_size_scaling_csv(path: &Path, rows: &[ScalingMetrics]) -> Result<()> {
729    let mut writer = Writer::from_path(path)?;
730    writer.write_record([
731        "num_events",
732        "tau_star",
733        "transition_width",
734        "max_derivative",
735        "width_0_1_to_0_9",
736    ])?;
737    for row in rows {
738        writer.write_record([
739            row.num_events.to_string(),
740            row.tau_star.to_string(),
741            row.transition_width.to_string(),
742            row.max_derivative.to_string(),
743            row.transition_width.to_string(),
744        ])?;
745    }
746    writer.flush()?;
747    Ok(())
748}
749
750fn write_graph_events_csv(path: &Path, events: &[DscdEvent]) -> Result<()> {
751    let mut writer = Writer::from_path(path)?;
752    writer.write_record([
753        "id",
754        "time_index",
755        "module_id",
756        "residual_state",
757        "trust",
758        "echo_slope",
759        "entropy_density",
760        "structural_level",
761    ])?;
762    for event in events {
763        writer.write_record([
764            event.id.to_string(),
765            event.time_index.to_string(),
766            event.module_id.to_string(),
767            event.residual_state.clone(),
768            event.trust.to_string(),
769            event.echo_slope.to_string(),
770            event.entropy_density.to_string(),
771            event.structural_level.to_string(),
772        ])?;
773    }
774    writer.flush()?;
775    Ok(())
776}
777
778fn write_graph_edges_csv(path: &Path, edges: &[DscdEdge]) -> Result<()> {
779    let mut writer = Writer::from_path(path)?;
780    writer.write_record([
781        "edge_id",
782        "src",
783        "dst",
784        "module_id",
785        "rewrite_rule_id",
786        "base_trust",
787        "effective_trust",
788        "trust_at_creation",
789        "temporal_lag",
790        "structural_distance",
791        "structural_level_source",
792        "structural_level_target",
793    ])?;
794    for edge in edges {
795        writer.write_record([
796            edge.edge_id.to_string(),
797            edge.src.to_string(),
798            edge.dst.to_string(),
799            edge.module_id.to_string(),
800            edge.rewrite_rule_id.clone().unwrap_or_default(),
801            edge.base_trust.to_string(),
802            edge.effective_trust.to_string(),
803            edge.trust_at_creation.to_string(),
804            edge.temporal_lag.to_string(),
805            edge.structural_distance.to_string(),
806            edge.structural_level_source.to_string(),
807            edge.structural_level_target.to_string(),
808        ])?;
809    }
810    writer.flush()?;
811    Ok(())
812}
813
814fn write_degree_distribution_csv(path: &Path, graph: &DscdGraph) -> Result<()> {
815    let mut in_degree: HashMap<u64, usize> =
816        graph.events.iter().map(|event| (event.id, 0)).collect();
817    let mut out_degree: HashMap<u64, usize> =
818        graph.events.iter().map(|event| (event.id, 0)).collect();
819
820    for edge in &graph.edges {
821        *out_degree.entry(edge.src).or_insert(0) += 1;
822        *in_degree.entry(edge.dst).or_insert(0) += 1;
823    }
824
825    let mut writer = Writer::from_path(path)?;
826    writer.write_record(["event_id", "in_degree", "out_degree"])?;
827    for event in &graph.events {
828        writer.write_record([
829            event.id.to_string(),
830            in_degree.get(&event.id).copied().unwrap_or(0).to_string(),
831            out_degree.get(&event.id).copied().unwrap_or(0).to_string(),
832        ])?;
833    }
834    writer.flush()?;
835    Ok(())
836}
837
838fn write_interval_sizes_csv(path: &Path, graph: &DscdGraph, sample_count: usize) -> Result<()> {
839    let mut ids_by_time: Vec<(u64, u64)> = graph
840        .events
841        .iter()
842        .map(|event| (event.time_index, event.id))
843        .collect();
844    ids_by_time.sort_unstable();
845    let ordered_ids: Vec<u64> = ids_by_time.into_iter().map(|(_, id)| id).collect();
846
847    let mut adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
848    let mut reverse_adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
849    for edge in &graph.edges {
850        adjacency.entry(edge.src).or_default().push(edge.dst);
851        reverse_adjacency
852            .entry(edge.dst)
853            .or_default()
854            .push(edge.src);
855    }
856
857    let pairs = deterministic_pairs(&ordered_ids, sample_count);
858    let mut forward_cache: HashMap<u64, HashSet<u64>> = HashMap::new();
859    let mut reverse_cache: HashMap<u64, HashSet<u64>> = HashMap::new();
860
861    let mut writer = Writer::from_path(path)?;
862    writer.write_record(["src", "dst", "interval_size"])?;
863    for (src, dst) in pairs {
864        let forward = reachability_set(src, &adjacency, &mut forward_cache);
865        let interval_size = if !forward.contains(&dst) {
866            0
867        } else {
868            let backward = reachability_set(dst, &reverse_adjacency, &mut reverse_cache);
869            forward
870                .intersection(&backward)
871                .filter(|node| **node != src && **node != dst)
872                .count()
873        };
874
875        writer.write_record([src.to_string(), dst.to_string(), interval_size.to_string()])?;
876    }
877    writer.flush()?;
878    Ok(())
879}
880
881fn reachability_set(
882    start: u64,
883    adjacency: &HashMap<u64, Vec<u64>>,
884    cache: &mut HashMap<u64, HashSet<u64>>,
885) -> HashSet<u64> {
886    if let Some(cached) = cache.get(&start) {
887        return cached.clone();
888    }
889
890    let mut queue = VecDeque::from([start]);
891    let mut visited = HashSet::from([start]);
892    while let Some(current) = queue.pop_front() {
893        if let Some(neighbors) = adjacency.get(&current) {
894            for next in neighbors {
895                if visited.insert(*next) {
896                    queue.push_back(*next);
897                }
898            }
899        }
900    }
901
902    cache.insert(start, visited.clone());
903    visited
904}
905
906fn deterministic_pairs(ordered_ids: &[u64], sample_count: usize) -> Vec<(u64, u64)> {
907    if ordered_ids.len() < 2 || sample_count == 0 {
908        return Vec::new();
909    }
910
911    let n = ordered_ids.len();
912    let max_pairs = n.saturating_mul(n.saturating_sub(1)) / 2;
913    let target = sample_count.min(max_pairs);
914
915    let mut state = 0x9E37_79B9_7F4A_7C15_u64;
916    let mut seen = BTreeSet::new();
917    let mut pairs = Vec::with_capacity(target);
918    let mut attempts = 0_usize;
919    let max_attempts = target.saturating_mul(20).max(128);
920
921    while pairs.len() < target && attempts < max_attempts {
922        attempts += 1;
923        state = state
924            .wrapping_mul(6_364_136_223_846_793_005)
925            .wrapping_add(1);
926        let i = (state as usize) % (n - 1);
927        state = state
928            .wrapping_mul(6_364_136_223_846_793_005)
929            .wrapping_add(1);
930        let gap = ((state as usize) % (n - i - 1)) + 1;
931        let j = i + gap;
932        if seen.insert((i, j)) {
933            pairs.push((ordered_ids[i], ordered_ids[j]));
934        }
935    }
936
937    if pairs.len() < target {
938        for i in 0..(n - 1) {
939            for j in (i + 1)..n {
940                if seen.insert((i, j)) {
941                    pairs.push((ordered_ids[i], ordered_ids[j]));
942                    if pairs.len() == target {
943                        return pairs;
944                    }
945                }
946            }
947        }
948    }
949
950    pairs
951}
952
953fn write_path_lengths_csv(path: &Path, graph: &DscdGraph, root_event_id: u64) -> Result<()> {
954    let mut events = graph.events.clone();
955    events.sort_by_key(|event| event.time_index);
956
957    let mut adjacency: HashMap<u64, Vec<u64>> = HashMap::new();
958    for edge in &graph.edges {
959        adjacency.entry(edge.src).or_default().push(edge.dst);
960    }
961
962    let mut distances: HashMap<u64, Option<usize>> =
963        events.iter().map(|event| (event.id, None)).collect();
964    if distances.contains_key(&root_event_id) {
965        distances.insert(root_event_id, Some(0));
966    }
967
968    for event in &events {
969        let Some(Some(dist)) = distances.get(&event.id).copied() else {
970            continue;
971        };
972
973        if let Some(neighbors) = adjacency.get(&event.id) {
974            for next in neighbors {
975                let candidate = dist + 1;
976                let current = distances.get(next).copied().flatten();
977                if current.map_or(true, |value| candidate > value) {
978                    distances.insert(*next, Some(candidate));
979                }
980            }
981        }
982    }
983
984    let mut writer = Writer::from_path(path)?;
985    writer.write_record(["event_id", "longest_path_length"])?;
986    for event in &events {
987        writer.write_record([
988            event.id.to_string(),
989            distances
990                .get(&event.id)
991                .copied()
992                .flatten()
993                .unwrap_or(0)
994                .to_string(),
995        ])?;
996    }
997    writer.flush()?;
998    Ok(())
999}
1000
1001fn write_edge_provenance_csv(
1002    path: &Path,
1003    graph: &DscdGraph,
1004    tau_star: f64,
1005    limit: usize,
1006) -> Result<()> {
1007    let event_by_id: HashMap<u64, &DscdEvent> =
1008        graph.events.iter().map(|event| (event.id, event)).collect();
1009    let mut writer = Writer::from_path(path)?;
1010    writer.write_record([
1011        "edge_id",
1012        "source_event",
1013        "target_event",
1014        "tau_star",
1015        "base_trust",
1016        "effective_trust",
1017        "dt",
1018        "d_struct",
1019        "structural_level_source",
1020        "structural_level_target",
1021        "module_id",
1022        "rewrite_rule_id",
1023        "residual_state_source",
1024        "residual_state_target",
1025    ])?;
1026
1027    for edge in graph.edges.iter().take(limit) {
1028        let Some(src) = event_by_id.get(&edge.src) else {
1029            continue;
1030        };
1031        let Some(dst) = event_by_id.get(&edge.dst) else {
1032            continue;
1033        };
1034        writer.write_record([
1035            edge.edge_id.to_string(),
1036            edge.src.to_string(),
1037            edge.dst.to_string(),
1038            tau_star.to_string(),
1039            edge.base_trust.to_string(),
1040            edge.effective_trust.to_string(),
1041            edge.temporal_lag.to_string(),
1042            edge.structural_distance.to_string(),
1043            edge.structural_level_source.to_string(),
1044            edge.structural_level_target.to_string(),
1045            edge.module_id.to_string(),
1046            edge.rewrite_rule_id.clone().unwrap_or_default(),
1047            src.residual_state.clone(),
1048            dst.residual_state.clone(),
1049        ])?;
1050    }
1051    writer.flush()?;
1052    Ok(())
1053}
1054
1055/// Create a timestamped DSCD run directory under an output root.
1056pub fn create_dscd_run_dir(output_root: &Path) -> Result<PathBuf> {
1057    fs::create_dir_all(output_root).with_context(|| {
1058        format!(
1059            "failed to create DSCD output root {}",
1060            output_root.display()
1061        )
1062    })?;
1063
1064    let stamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
1065
1066    let mut run_dir = output_root.join(&stamp);
1067    let mut suffix = 1_usize;
1068    while run_dir.exists() {
1069        run_dir = output_root.join(format!("{stamp}_{suffix:02}"));
1070        suffix += 1;
1071    }
1072
1073    fs::create_dir_all(&run_dir)
1074        .with_context(|| format!("failed to create {}", run_dir.display()))?;
1075    Ok(run_dir)
1076}
1077
1078#[cfg(test)]
1079mod tests {
1080    use super::*;
1081    use std::time::{SystemTime, UNIX_EPOCH};
1082
1083    fn toy_events() -> Vec<DscdEvent> {
1084        (0..8_u64)
1085            .map(|idx| DscdEvent {
1086                id: idx,
1087                time_index: idx,
1088                module_id: (idx % 2) as u32,
1089                residual_state: "M".to_string(),
1090                trust: 0.9 - 0.05 * idx as f64,
1091                echo_slope: idx as f64 * 0.1,
1092                entropy_density: idx as f64 * 0.1,
1093                structural_level: idx as f64 * 0.05,
1094            })
1095            .collect()
1096    }
1097
1098    fn toy_candidates(events: &[DscdEvent], params: &DscdParams) -> Vec<DscdEdge> {
1099        let aux: Vec<EventAux> = events
1100            .iter()
1101            .map(|event| {
1102                let mut observer_trust = HashMap::new();
1103                observer_trust.insert(event.module_id, event.trust);
1104                EventAux {
1105                    event: event.clone(),
1106                    rewrite_rule_id: "stable_envelope".to_string(),
1107                    observer_trust,
1108                }
1109            })
1110            .collect();
1111        build_candidate_edges(&aux, params)
1112    }
1113
1114    #[test]
1115    fn trust_gate_respects_tau_and_time_order() {
1116        let events = vec![
1117            DscdEvent {
1118                id: 0,
1119                time_index: 0,
1120                module_id: 0,
1121                residual_state: "L".to_string(),
1122                trust: 0.9,
1123                echo_slope: 0.1,
1124                entropy_density: 0.2,
1125                structural_level: 0.1,
1126            },
1127            DscdEvent {
1128                id: 1,
1129                time_index: 1,
1130                module_id: 0,
1131                residual_state: "M".to_string(),
1132                trust: 0.7,
1133                echo_slope: 0.2,
1134                entropy_density: 0.3,
1135                structural_level: 0.2,
1136            },
1137        ];
1138        let edges = vec![
1139            DscdEdge {
1140                edge_id: 0,
1141                src: 0,
1142                dst: 1,
1143                module_id: 0,
1144                rewrite_rule_id: Some("rule_a".to_string()),
1145                base_trust: 0.8,
1146                effective_trust: 0.8,
1147                trust_at_creation: 0.8,
1148                temporal_lag: 1,
1149                structural_distance: 0.1,
1150                structural_level_source: 0.1,
1151                structural_level_target: 0.2,
1152            },
1153            DscdEdge {
1154                edge_id: 1,
1155                src: 1,
1156                dst: 0,
1157                module_id: 0,
1158                rewrite_rule_id: Some("rule_b".to_string()),
1159                base_trust: 0.9,
1160                effective_trust: 0.9,
1161                trust_at_creation: 0.9,
1162                temporal_lag: 1,
1163                structural_distance: 0.1,
1164                structural_level_source: 0.2,
1165                structural_level_target: 0.1,
1166            },
1167        ];
1168
1169        let graph = build_graph_for_tau(&events, &edges, 0.75);
1170        assert_eq!(graph.edges.len(), 1);
1171        assert_eq!(graph.edges[0].src, 0);
1172        assert_eq!(graph.edges[0].dst, 1);
1173    }
1174
1175    #[test]
1176    fn local_rule_enforces_lag_radius_and_outdegree() {
1177        let params = DscdParams {
1178            num_events: 128,
1179            tau_min: 0.0,
1180            tau_max: 1.0,
1181            tau_steps: 8,
1182            max_temporal_lag: 2,
1183            max_structural_radius: 0.08,
1184            temporal_decay_gamma: 0.0,
1185            structural_decay_beta: 2.0,
1186            max_out_edges: 2,
1187        };
1188        let events = toy_events();
1189        let candidates = toy_candidates(&events, &params);
1190
1191        let mut out_degree = HashMap::<u64, usize>::new();
1192        for edge in &candidates {
1193            let src_event = events
1194                .iter()
1195                .find(|event| event.id == edge.src)
1196                .expect("src");
1197            let dst_event = events
1198                .iter()
1199                .find(|event| event.id == edge.dst)
1200                .expect("dst");
1201
1202            let lag = dst_event.time_index - src_event.time_index;
1203            assert!(lag as usize <= params.max_temporal_lag);
1204
1205            let d_struct = structural_distance(src_event, dst_event);
1206            assert!(d_struct <= params.max_structural_radius + 1.0e-12);
1207
1208            *out_degree.entry(edge.src).or_insert(0) += 1;
1209        }
1210
1211        assert!(out_degree
1212            .values()
1213            .all(|degree| *degree <= params.max_out_edges));
1214    }
1215
1216    #[test]
1217    fn threshold_monotonicity_holds() {
1218        let params = DscdParams {
1219            num_events: 128,
1220            tau_min: 0.0,
1221            tau_max: 1.0,
1222            tau_steps: 8,
1223            max_temporal_lag: 4,
1224            max_structural_radius: 1.0,
1225            temporal_decay_gamma: 0.01,
1226            structural_decay_beta: 2.0,
1227            max_out_edges: 3,
1228        };
1229        let events = toy_events();
1230        let candidates = toy_candidates(&events, &params);
1231
1232        let tau_low = 0.2;
1233        let tau_high = 0.6;
1234        let graph_low = build_graph_for_tau(&events, &candidates, tau_low);
1235        let graph_high = build_graph_for_tau(&events, &candidates, tau_high);
1236
1237        let low_set: HashSet<(u64, u64)> = graph_low
1238            .edges
1239            .iter()
1240            .map(|edge| (edge.src, edge.dst))
1241            .collect();
1242        for edge in &graph_high.edges {
1243            assert!(low_set.contains(&(edge.src, edge.dst)));
1244        }
1245    }
1246
1247    #[test]
1248    fn tiny_scaling_run_writes_required_outputs() {
1249        let out_dir = std::env::temp_dir().join(format!(
1250            "dsfb-dscd-sim-test-{}",
1251            SystemTime::now()
1252                .duration_since(UNIX_EPOCH)
1253                .expect("clock")
1254                .as_nanos()
1255        ));
1256
1257        let cfg = DscdConfig {
1258            params: DscdParams {
1259                num_events: 128,
1260                tau_min: 0.0,
1261                tau_max: 1.0,
1262                tau_steps: 3,
1263                max_temporal_lag: 8,
1264                max_structural_radius: 1.0,
1265                temporal_decay_gamma: 0.0,
1266                structural_decay_beta: 1.0,
1267                max_out_edges: 4,
1268            },
1269            root_event_id: 0,
1270            output_dir: out_dir.clone(),
1271            scaling_ns: vec![64, 128],
1272        };
1273        run_dscd_simulation(&cfg).expect("simulation should run");
1274
1275        assert!(out_dir.join("threshold_scaling_summary.csv").exists());
1276        assert!(out_dir.join("finite_size_scaling.csv").exists());
1277        assert!(out_dir.join("threshold_curve_N_64.csv").exists());
1278        assert!(out_dir.join("threshold_curve_N_128.csv").exists());
1279        assert!(out_dir.join("graph_events.csv").exists());
1280        assert!(out_dir.join("graph_edges.csv").exists());
1281        assert!(out_dir.join("edge_provenance_0.csv").exists());
1282
1283        let mut reader = csv::Reader::from_path(out_dir.join("threshold_scaling_summary.csv"))
1284            .expect("summary csv");
1285        for row in reader.records() {
1286            let row = row.expect("csv row");
1287            let tau_star: f64 = row.get(1).expect("tau_star").parse().expect("parse tau");
1288            let width: f64 = row.get(2).expect("width").parse().expect("parse width");
1289            assert!((0.0..=1.0).contains(&tau_star));
1290            assert!(width >= 0.0);
1291        }
1292
1293        let _ = fs::remove_dir_all(out_dir);
1294    }
1295}