Skip to main content

openentropy_core/
session.rs

1//! Session recording for entropy collection research.
2//!
3//! Records timestamped entropy samples from one or more sources, storing raw
4//! bytes, CSV metrics, and session metadata. Designed for offline analysis of
5//! how entropy sources behave under different conditions.
6//!
7//! # Storage Format
8//!
9//! Each session is a directory containing:
10//! - `session.json` — metadata (sources, timing, machine info, tags)
11//! - `samples.csv` — per-sample metrics (raw + conditioned entropy stats)
12//! - `raw.bin` — concatenated raw bytes
13//! - `raw_index.csv` — byte offset index into raw.bin
14//! - `conditioned.bin` — concatenated conditioned bytes
15//! - `conditioned_index.csv` — byte offset index into conditioned.bin
16
17use std::collections::{HashMap, VecDeque};
18use std::fs::{self, File};
19use std::io::{BufWriter, Write};
20use std::path::{Path, PathBuf};
21use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
22
23use serde::{Deserialize, Serialize};
24use uuid::Uuid;
25
26use crate::analysis;
27use crate::conditioning::{ConditioningMode, quick_min_entropy, quick_shannon};
28
29// ---------------------------------------------------------------------------
30// Machine info
31// ---------------------------------------------------------------------------
32
33/// Machine information captured at session start.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct MachineInfo {
36    pub os: String,
37    pub arch: String,
38    pub chip: String,
39    pub cores: usize,
40}
41
42/// Detect machine information (best-effort).
43pub fn detect_machine_info() -> MachineInfo {
44    let os = format!(
45        "{} {}",
46        std::env::consts::OS,
47        os_version().unwrap_or_default()
48    );
49    let arch = std::env::consts::ARCH.to_string();
50    let chip = detect_chip().unwrap_or_else(|| "unknown".to_string());
51    let cores = std::thread::available_parallelism()
52        .map(std::num::NonZero::get)
53        .unwrap_or(1);
54
55    MachineInfo {
56        os,
57        arch,
58        chip,
59        cores,
60    }
61}
62
63/// Get OS version string (best-effort).
64fn os_version() -> Option<String> {
65    #[cfg(target_os = "macos")]
66    {
67        let output = std::process::Command::new("sw_vers")
68            .arg("-productVersion")
69            .output()
70            .ok()?;
71        Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
72    }
73    #[cfg(target_os = "linux")]
74    {
75        std::fs::read_to_string("/etc/os-release")
76            .ok()
77            .and_then(|s| {
78                s.lines().find(|l| l.starts_with("PRETTY_NAME=")).map(|l| {
79                    l.trim_start_matches("PRETTY_NAME=")
80                        .trim_matches('"')
81                        .to_string()
82                })
83            })
84    }
85    #[cfg(not(any(target_os = "macos", target_os = "linux")))]
86    {
87        None
88    }
89}
90
91/// Detect chip/CPU name (best-effort).
92fn detect_chip() -> Option<String> {
93    #[cfg(target_os = "macos")]
94    {
95        let output = std::process::Command::new("sysctl")
96            .arg("-n")
97            .arg("machdep.cpu.brand_string")
98            .output()
99            .ok()?;
100        let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
101        if s.is_empty() { None } else { Some(s) }
102    }
103    #[cfg(target_os = "linux")]
104    {
105        std::fs::read_to_string("/proc/cpuinfo").ok().and_then(|s| {
106            s.lines()
107                .find(|l| l.starts_with("model name"))
108                .map(|l| l.split(':').nth(1).unwrap_or("").trim().to_string())
109        })
110    }
111    #[cfg(not(any(target_os = "macos", target_os = "linux")))]
112    {
113        None
114    }
115}
116
117// ---------------------------------------------------------------------------
118// Per-source analysis summary (embedded in session.json)
119// ---------------------------------------------------------------------------
120
121/// Compact analysis summary for a single source, embedded in session metadata.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct SessionSourceAnalysis {
124    pub autocorrelation_max: f64,
125    pub autocorrelation_violations: usize,
126    pub spectral_flatness: f64,
127    pub spectral_dominant_freq: f64,
128    pub bit_bias_max: f64,
129    pub bit_bias_has_significant: bool,
130    pub distribution_ks_p: f64,
131    pub distribution_mean: f64,
132    pub distribution_std: f64,
133    pub stationarity_f_stat: f64,
134    pub stationarity_is_stationary: bool,
135    pub runs_longest: usize,
136    pub runs_total: usize,
137}
138
139impl SessionSourceAnalysis {
140    /// Build a compact summary from a full `SourceAnalysis`.
141    fn from_full(sa: &analysis::SourceAnalysis) -> Self {
142        Self {
143            autocorrelation_max: sa.autocorrelation.max_abs_correlation,
144            autocorrelation_violations: sa.autocorrelation.violations,
145            spectral_flatness: sa.spectral.flatness,
146            spectral_dominant_freq: sa.spectral.dominant_frequency,
147            bit_bias_max: sa.bit_bias.overall_bias,
148            bit_bias_has_significant: sa.bit_bias.has_significant_bias,
149            distribution_ks_p: sa.distribution.ks_p_value,
150            distribution_mean: sa.distribution.mean,
151            distribution_std: sa.distribution.std_dev,
152            stationarity_f_stat: sa.stationarity.f_statistic,
153            stationarity_is_stationary: sa.stationarity.is_stationary,
154            runs_longest: sa.runs.longest_run,
155            runs_total: sa.runs.total_runs,
156        }
157    }
158}
159
160// ---------------------------------------------------------------------------
161// Analysis buffer (retains last N bytes per source for end-of-session analysis)
162// ---------------------------------------------------------------------------
163
164/// Circular buffer that retains the last `capacity` bytes per source.
165struct AnalysisBuffer {
166    data: HashMap<String, VecDeque<u8>>,
167    capacity: usize,
168}
169
170impl AnalysisBuffer {
171    fn new(sources: &[String], capacity: usize) -> Self {
172        let data = sources
173            .iter()
174            .map(|s| (s.clone(), VecDeque::with_capacity(capacity)))
175            .collect();
176        Self { data, capacity }
177    }
178
179    fn push(&mut self, source: &str, bytes: &[u8]) {
180        if self.capacity == 0 || bytes.is_empty() {
181            return;
182        }
183
184        let buf = self
185            .data
186            .entry(source.to_string())
187            .or_insert_with(|| VecDeque::with_capacity(self.capacity));
188
189        if bytes.len() >= self.capacity {
190            buf.clear();
191            buf.extend(bytes[bytes.len() - self.capacity..].iter().copied());
192            return;
193        }
194
195        let overflow = buf.len() + bytes.len();
196        if overflow > self.capacity {
197            let to_drop = overflow - self.capacity;
198            for _ in 0..to_drop {
199                let _ = buf.pop_front();
200            }
201        }
202
203        buf.extend(bytes.iter().copied());
204    }
205
206    /// Run analysis on each source buffer and return the summary map.
207    fn analyze(&self) -> HashMap<String, SessionSourceAnalysis> {
208        self.data
209            .iter()
210            .filter(|(_, buf)| buf.len() >= 100) // Need minimum data for meaningful analysis
211            .map(|(name, buf)| {
212                let contiguous: Vec<u8> = buf.iter().copied().collect();
213                let full = analysis::full_analysis(name, &contiguous);
214                (name.clone(), SessionSourceAnalysis::from_full(&full))
215            })
216            .collect()
217    }
218}
219
220// ---------------------------------------------------------------------------
221// Session metadata (session.json)
222// ---------------------------------------------------------------------------
223
224/// Session metadata written to session.json at the end of recording.
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct SessionMeta {
227    pub version: u32,
228    pub id: String,
229    pub started_at: String,
230    pub ended_at: String,
231    pub duration_ms: u64,
232    pub sources: Vec<String>,
233    pub conditioning: String,
234    pub interval_ms: Option<u64>,
235    pub total_samples: u64,
236    pub samples_per_source: HashMap<String, u64>,
237    pub machine: MachineInfo,
238    pub tags: HashMap<String, String>,
239    pub note: Option<String>,
240    pub openentropy_version: String,
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub analysis: Option<HashMap<String, SessionSourceAnalysis>>,
243}
244
245// ---------------------------------------------------------------------------
246// Session config
247// ---------------------------------------------------------------------------
248
249/// Configuration for a recording session.
250#[derive(Debug, Clone)]
251pub struct SessionConfig {
252    pub sources: Vec<String>,
253    pub conditioning: ConditioningMode,
254    pub interval: Option<Duration>,
255    pub output_dir: PathBuf,
256    pub tags: HashMap<String, String>,
257    pub note: Option<String>,
258    pub duration: Option<Duration>,
259    pub sample_size: usize,
260    pub include_analysis: bool,
261}
262
263impl Default for SessionConfig {
264    fn default() -> Self {
265        Self {
266            sources: Vec::new(),
267            conditioning: ConditioningMode::Raw,
268            interval: None,
269            output_dir: PathBuf::from("sessions"),
270            tags: HashMap::new(),
271            note: None,
272            duration: None,
273            sample_size: 1000,
274            include_analysis: false,
275        }
276    }
277}
278
279// ---------------------------------------------------------------------------
280// Session writer
281// ---------------------------------------------------------------------------
282
283/// Number of samples between periodic flushes. Balances crash-safety
284/// (data written to disk) against performance (fewer syscalls).
285const FLUSH_INTERVAL: u64 = 64;
286
287/// Handles incremental file I/O for a recording session.
288///
289/// Implements `Drop` to flush buffers and write a best-effort session.json
290/// if `finish()` was never called (e.g., due to a panic or early exit).
291pub struct SessionWriter {
292    session_dir: PathBuf,
293    csv_writer: BufWriter<File>,
294    raw_writer: BufWriter<File>,
295    conditioned_writer: BufWriter<File>,
296    index_writer: BufWriter<File>,
297    conditioned_index_writer: BufWriter<File>,
298    raw_offset: u64,
299    conditioned_offset: u64,
300    total_samples: u64,
301    samples_per_source: HashMap<String, u64>,
302    started_at: SystemTime,
303    started_instant: Instant,
304    session_id: String,
305    config: SessionConfig,
306    machine: MachineInfo,
307    /// Retains last 128 KiB per source for optional end-of-session analysis.
308    analysis_buffer: Option<AnalysisBuffer>,
309    /// Set to true after `finish()` succeeds so `Drop` doesn't double-write.
310    finished: bool,
311}
312
313impl SessionWriter {
314    /// Create a new session writer, creating the session directory and files.
315    ///
316    /// # Errors
317    ///
318    /// Returns an error if the session directory or any output files cannot be created.
319    pub fn new(config: SessionConfig) -> std::io::Result<Self> {
320        let machine = detect_machine_info();
321        let session_id = Uuid::new_v4().to_string();
322        let started_at = SystemTime::now();
323
324        // Build directory name: bounded and filesystem-safe to avoid ENAMETOOLONG
325        // when many sources are recorded.
326        let ts = started_at.duration_since(UNIX_EPOCH).unwrap_or_default();
327        let dt = format_iso8601_compact(ts);
328        let dir_name = build_session_dir_name(&dt, &config.sources, &session_id);
329
330        let session_dir = config.output_dir.join(&dir_name);
331        fs::create_dir_all(&session_dir)?;
332
333        // Create samples.csv with header
334        let csv_file = File::create(session_dir.join("samples.csv"))?;
335        let mut csv_writer = BufWriter::new(csv_file);
336        writeln!(
337            csv_writer,
338            "timestamp_ns,source,raw_hex,conditioned_hex,raw_shannon,raw_min_entropy,conditioned_shannon,conditioned_min_entropy"
339        )?;
340        csv_writer.flush()?;
341
342        // Create raw.bin
343        let raw_file = File::create(session_dir.join("raw.bin"))?;
344        let raw_writer = BufWriter::new(raw_file);
345
346        // Create conditioned.bin
347        let conditioned_file = File::create(session_dir.join("conditioned.bin"))?;
348        let conditioned_writer = BufWriter::new(conditioned_file);
349
350        // Create raw_index.csv with header
351        let index_file = File::create(session_dir.join("raw_index.csv"))?;
352        let mut index_writer = BufWriter::new(index_file);
353        writeln!(index_writer, "offset,length,timestamp_ns,source")?;
354        index_writer.flush()?;
355
356        // Create conditioned_index.csv with header
357        let conditioned_index_file = File::create(session_dir.join("conditioned_index.csv"))?;
358        let mut conditioned_index_writer = BufWriter::new(conditioned_index_file);
359        writeln!(
360            conditioned_index_writer,
361            "offset,length,timestamp_ns,source"
362        )?;
363        conditioned_index_writer.flush()?;
364
365        let samples_per_source: HashMap<String, u64> =
366            config.sources.iter().map(|s| (s.clone(), 0)).collect();
367        let analysis_buffer = if config.include_analysis {
368            Some(AnalysisBuffer::new(&config.sources, 128 * 1024))
369        } else {
370            None
371        };
372
373        Ok(Self {
374            session_dir,
375            csv_writer,
376            raw_writer,
377            conditioned_writer,
378            index_writer,
379            conditioned_index_writer,
380            raw_offset: 0,
381            conditioned_offset: 0,
382            total_samples: 0,
383            samples_per_source,
384            started_at,
385            started_instant: Instant::now(),
386            session_id,
387            config,
388            machine,
389            analysis_buffer,
390            finished: false,
391        })
392    }
393
394    /// Record a single sample from a source.
395    ///
396    /// Buffers are flushed periodically (every [`FLUSH_INTERVAL`] samples)
397    /// rather than on every call, for performance. Data is still safe against
398    /// process crashes because `Drop` flushes and writes session.json.
399    ///
400    /// # Errors
401    ///
402    /// Returns an error if writing to any of the output files fails.
403    pub fn write_sample(
404        &mut self,
405        source: &str,
406        raw_bytes: &[u8],
407        conditioned_bytes: &[u8],
408    ) -> std::io::Result<()> {
409        if raw_bytes.is_empty() {
410            return Ok(());
411        }
412
413        #[allow(clippy::cast_possible_truncation)] // ns won't overflow u64 until ~2554
414        let timestamp_ns = SystemTime::now()
415            .duration_since(UNIX_EPOCH)
416            .unwrap_or_default()
417            .as_nanos() as u64;
418
419        let raw_shannon = quick_shannon(raw_bytes);
420        // Clamp to 0.0 to avoid displaying "-0.00" in CSV
421        let raw_min_entropy = quick_min_entropy(raw_bytes).max(0.0);
422        let conditioned_shannon = quick_shannon(conditioned_bytes);
423        let conditioned_min_entropy = quick_min_entropy(conditioned_bytes).max(0.0);
424        let raw_hex = hex_encode(raw_bytes);
425        let conditioned_hex = hex_encode(conditioned_bytes);
426
427        // Write CSV row
428        writeln!(
429            self.csv_writer,
430            "{timestamp_ns},{source},{raw_hex},{conditioned_hex},{raw_shannon:.2},{raw_min_entropy:.2},{conditioned_shannon:.2},{conditioned_min_entropy:.2}",
431        )?;
432
433        // Write raw bytes
434        self.raw_writer.write_all(raw_bytes)?;
435        self.conditioned_writer.write_all(conditioned_bytes)?;
436
437        // Write index row
438        writeln!(
439            self.index_writer,
440            "{},{},{timestamp_ns},{source}",
441            self.raw_offset,
442            raw_bytes.len(),
443        )?;
444        writeln!(
445            self.conditioned_index_writer,
446            "{},{},{timestamp_ns},{source}",
447            self.conditioned_offset,
448            conditioned_bytes.len(),
449        )?;
450
451        self.raw_offset += raw_bytes.len() as u64;
452        self.conditioned_offset += conditioned_bytes.len() as u64;
453        self.total_samples += 1;
454        if let Some(buffer) = &mut self.analysis_buffer {
455            buffer.push(source, raw_bytes);
456        }
457        *self
458            .samples_per_source
459            .entry(source.to_string())
460            .or_insert(0) += 1;
461
462        // Periodic flush for crash-safety without per-sample syscall overhead
463        if self.total_samples.is_multiple_of(FLUSH_INTERVAL) {
464            self.flush_all()?;
465        }
466
467        Ok(())
468    }
469
470    /// Flush all buffered writers to disk.
471    fn flush_all(&mut self) -> std::io::Result<()> {
472        self.csv_writer.flush()?;
473        self.raw_writer.flush()?;
474        self.conditioned_writer.flush()?;
475        self.index_writer.flush()?;
476        self.conditioned_index_writer.flush()?;
477        Ok(())
478    }
479
480    /// Build the session metadata from current state.
481    #[allow(clippy::cast_possible_truncation)] // durations won't overflow u64 in practice
482    fn build_meta(&self) -> SessionMeta {
483        let ended_at = SystemTime::now();
484        let duration = self.started_instant.elapsed();
485
486        let analysis = self.analysis_buffer.as_ref().and_then(|buffer| {
487            let analysis_map = buffer.analyze();
488            if analysis_map.is_empty() {
489                None
490            } else {
491                Some(analysis_map)
492            }
493        });
494
495        SessionMeta {
496            version: 2,
497            id: self.session_id.clone(),
498            started_at: format_iso8601(
499                self.started_at
500                    .duration_since(UNIX_EPOCH)
501                    .unwrap_or_default(),
502            ),
503            ended_at: format_iso8601(ended_at.duration_since(UNIX_EPOCH).unwrap_or_default()),
504            duration_ms: duration.as_millis() as u64,
505            sources: self.config.sources.clone(),
506            conditioning: self.config.conditioning.to_string(),
507            interval_ms: self.config.interval.map(|d| d.as_millis() as u64),
508            total_samples: self.total_samples,
509            samples_per_source: self.samples_per_source.clone(),
510            machine: self.machine.clone(),
511            tags: self.config.tags.clone(),
512            note: self.config.note.clone(),
513            openentropy_version: crate::VERSION.to_string(),
514            analysis,
515        }
516    }
517
518    /// Write session.json to disk.
519    fn write_session_json(&self, meta: &SessionMeta) -> std::io::Result<()> {
520        let json = serde_json::to_string_pretty(meta).map_err(std::io::Error::other)?;
521        fs::write(self.session_dir.join("session.json"), json)
522    }
523
524    /// Finalize the session, writing session.json. Call this on graceful shutdown.
525    ///
526    /// # Errors
527    ///
528    /// Returns an error if flushing buffers or writing session.json fails.
529    pub fn finish(mut self) -> std::io::Result<PathBuf> {
530        self.flush_all()?;
531        let meta = self.build_meta();
532        self.write_session_json(&meta)?;
533        self.finished = true;
534        Ok(self.session_dir.clone())
535    }
536
537    /// Get the session directory path.
538    #[must_use]
539    pub fn session_dir(&self) -> &Path {
540        &self.session_dir
541    }
542
543    /// Get total samples recorded so far.
544    #[must_use]
545    pub fn total_samples(&self) -> u64 {
546        self.total_samples
547    }
548
549    /// Get elapsed time since recording started.
550    #[must_use]
551    pub fn elapsed(&self) -> Duration {
552        self.started_instant.elapsed()
553    }
554
555    /// Get per-source sample counts.
556    #[must_use]
557    pub fn samples_per_source(&self) -> &HashMap<String, u64> {
558        &self.samples_per_source
559    }
560}
561
562impl Drop for SessionWriter {
563    fn drop(&mut self) {
564        if self.finished {
565            return;
566        }
567        // Best-effort: flush buffers and write session.json so data isn't lost
568        // on panic/early-exit. Errors are silently ignored since we're in Drop.
569        let _ = self.flush_all();
570        let meta = self.build_meta();
571        let _ = self.write_session_json(&meta);
572    }
573}
574
575// ---------------------------------------------------------------------------
576// Helpers
577// ---------------------------------------------------------------------------
578
579/// Hex-encode bytes without any separator.
580fn hex_encode(bytes: &[u8]) -> String {
581    use std::fmt::Write;
582    let mut s = String::with_capacity(bytes.len() * 2);
583    for &b in bytes {
584        write!(s, "{b:02x}").unwrap();
585    }
586    s
587}
588
589/// Format a duration-since-epoch as a compact ISO-8601 timestamp for directory names.
590/// Example: `2026-02-15T013000Z`
591fn format_iso8601_compact(since_epoch: Duration) -> String {
592    let secs = since_epoch.as_secs();
593    let (year, month, day, hour, min, sec) = secs_to_utc(secs);
594    format!("{year:04}-{month:02}-{day:02}T{hour:02}{min:02}{sec:02}Z")
595}
596
597/// Format a duration-since-epoch as a full ISO-8601 timestamp.
598/// Example: `2026-02-15T01:30:00Z`
599fn format_iso8601(since_epoch: Duration) -> String {
600    let secs = since_epoch.as_secs();
601    let (year, month, day, hour, min, sec) = secs_to_utc(secs);
602    format!("{year:04}-{month:02}-{day:02}T{hour:02}:{min:02}:{sec:02}Z")
603}
604
605/// Convert seconds since Unix epoch to (year, month, day, hour, minute, second) UTC.
606/// Simple implementation — no leap second handling.
607fn secs_to_utc(secs: u64) -> (u64, u64, u64, u64, u64, u64) {
608    let sec = secs % 60;
609    let min = (secs / 60) % 60;
610    let hour = (secs / 3600) % 24;
611
612    let mut days = secs / 86400;
613    let mut year = 1970u64;
614
615    loop {
616        let days_in_year = if is_leap(year) { 366 } else { 365 };
617        if days < days_in_year {
618            break;
619        }
620        days -= days_in_year;
621        year += 1;
622    }
623
624    let months_days: [u64; 12] = if is_leap(year) {
625        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
626    } else {
627        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
628    };
629
630    let mut month = 0u64;
631    for (i, &md) in months_days.iter().enumerate() {
632        if days < md {
633            month = i as u64 + 1;
634            break;
635        }
636        days -= md;
637    }
638    let day = days + 1;
639
640    (year, month, day, hour, min, sec)
641}
642
643fn is_leap(year: u64) -> bool {
644    (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400)
645}
646
647/// Build a compact, filesystem-safe session directory name.
648///
649/// Format: `{timestamp}-{source-label}-{id8}`
650/// Examples:
651/// - `2026-02-17T193000Z-clock_jitter-a1b2c3d4`
652/// - `2026-02-17T193000Z-clock_jitter-plus34-a1b2c3d4`
653fn build_session_dir_name(timestamp: &str, sources: &[String], session_id: &str) -> String {
654    let first = sources.first().map(String::as_str).unwrap_or("unknown");
655    let first = sanitize_for_path(first);
656    let label = if sources.len() <= 1 {
657        truncate_for_path(&first, 48)
658    } else {
659        let base = truncate_for_path(&first, 36);
660        format!("{base}-plus{}", sources.len() - 1)
661    };
662    let id8 = session_id.chars().take(8).collect::<String>();
663    format!("{timestamp}-{label}-{id8}")
664}
665
666/// Replace non path-safe characters with `_`.
667fn sanitize_for_path(s: &str) -> String {
668    s.chars()
669        .map(|c| {
670            if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
671                c
672            } else {
673                '_'
674            }
675        })
676        .collect()
677}
678
679/// Truncate by character count (ASCII-safe output from sanitize_for_path).
680fn truncate_for_path(s: &str, max_chars: usize) -> String {
681    s.chars().take(max_chars).collect()
682}
683
684// ---------------------------------------------------------------------------
685// Tests
686// ---------------------------------------------------------------------------
687
688#[cfg(test)]
689mod tests {
690    use super::*;
691
692    // -----------------------------------------------------------------------
693    // Machine info tests
694    // -----------------------------------------------------------------------
695
696    #[test]
697    fn test_detect_machine_info() {
698        let info = detect_machine_info();
699        assert!(!info.os.is_empty());
700        assert!(!info.arch.is_empty());
701        assert!(info.cores > 0);
702    }
703
704    // -----------------------------------------------------------------------
705    // ISO-8601 formatting tests
706    // -----------------------------------------------------------------------
707
708    #[test]
709    fn test_format_iso8601_epoch() {
710        let s = format_iso8601(Duration::from_secs(0));
711        assert_eq!(s, "1970-01-01T00:00:00Z");
712    }
713
714    #[test]
715    fn test_format_iso8601_compact_epoch() {
716        let s = format_iso8601_compact(Duration::from_secs(0));
717        assert_eq!(s, "1970-01-01T000000Z");
718    }
719
720    #[test]
721    fn test_format_iso8601_known_date() {
722        // 2026-02-15 01:30:00 UTC = 1771030200 seconds since epoch
723        let s = format_iso8601(Duration::from_secs(1771030200));
724        assert!(s.starts_with("2026-"));
725    }
726
727    // -----------------------------------------------------------------------
728    // Hex encode tests
729    // -----------------------------------------------------------------------
730
731    #[test]
732    fn test_hex_encode_empty() {
733        assert_eq!(hex_encode(&[]), "");
734    }
735
736    #[test]
737    fn test_hex_encode_basic() {
738        assert_eq!(hex_encode(&[0xab, 0xcd, 0x01]), "abcd01");
739    }
740
741    // -----------------------------------------------------------------------
742    // SessionWriter tests
743    // -----------------------------------------------------------------------
744
745    #[test]
746    fn test_session_writer_creates_directory_and_files() {
747        let tmp = tempfile::tempdir().unwrap();
748        let config = SessionConfig {
749            sources: vec!["test_source".to_string()],
750            output_dir: tmp.path().to_path_buf(),
751            ..Default::default()
752        };
753
754        let writer = SessionWriter::new(config).unwrap();
755        let dir = writer.session_dir().to_path_buf();
756
757        assert!(dir.exists());
758        assert!(dir.join("samples.csv").exists());
759        assert!(dir.join("raw.bin").exists());
760        assert!(dir.join("raw_index.csv").exists());
761        assert!(dir.join("conditioned.bin").exists());
762        assert!(dir.join("conditioned_index.csv").exists());
763
764        // Finish and verify session.json
765        let result_dir = writer.finish().unwrap();
766        assert!(result_dir.join("session.json").exists());
767    }
768
769    #[test]
770    fn test_build_session_dir_name_is_compact() {
771        let sources: Vec<String> = (0..40)
772            .map(|i| format!("very_long_source_name_number_{i}_with_extra_detail"))
773            .collect();
774        let name = build_session_dir_name("2026-02-17T010203Z", &sources, "12345678-aaaa-bbbb");
775        assert!(name.len() < 128, "dir name too long: {} chars", name.len());
776        assert!(name.contains("plus39"));
777    }
778
779    #[test]
780    fn test_session_writer_with_many_sources_does_not_fail() {
781        let tmp = tempfile::tempdir().unwrap();
782        let sources: Vec<String> = (0..40)
783            .map(|i| format!("very_long_source_name_number_{i}_with_extra_detail"))
784            .collect();
785        let config = SessionConfig {
786            sources,
787            output_dir: tmp.path().to_path_buf(),
788            ..Default::default()
789        };
790        let writer = SessionWriter::new(config).expect("SessionWriter should handle many sources");
791        assert!(writer.session_dir().exists());
792    }
793
794    #[test]
795    fn test_session_writer_writes_valid_csv() {
796        let tmp = tempfile::tempdir().unwrap();
797        let config = SessionConfig {
798            sources: vec!["mock_source".to_string()],
799            output_dir: tmp.path().to_path_buf(),
800            ..Default::default()
801        };
802
803        let mut writer = SessionWriter::new(config).unwrap();
804        let data = vec![0xAA; 100];
805        writer.write_sample("mock_source", &data, &data).unwrap();
806        writer.write_sample("mock_source", &data, &data).unwrap();
807
808        let dir = writer.session_dir().to_path_buf();
809        let result_dir = writer.finish().unwrap();
810
811        // Check CSV
812        let csv = std::fs::read_to_string(dir.join("samples.csv")).unwrap();
813        let lines: Vec<&str> = csv.lines().collect();
814        assert_eq!(
815            lines[0],
816            "timestamp_ns,source,raw_hex,conditioned_hex,raw_shannon,raw_min_entropy,conditioned_shannon,conditioned_min_entropy"
817        );
818        assert_eq!(lines.len(), 3); // header + 2 samples
819        assert!(lines[1].contains("mock_source"));
820
821        // Check raw.bin size
822        let raw = std::fs::read(dir.join("raw.bin")).unwrap();
823        assert_eq!(raw.len(), 200); // 2 x 100 bytes
824
825        // Check raw_index.csv
826        let index = std::fs::read_to_string(dir.join("raw_index.csv")).unwrap();
827        let idx_lines: Vec<&str> = index.lines().collect();
828        assert_eq!(idx_lines.len(), 3); // header + 2 entries
829        assert!(idx_lines[1].starts_with("0,100,")); // first entry at offset 0
830        assert!(idx_lines[2].starts_with("100,100,")); // second at offset 100
831
832        // Check conditioned.bin/index
833        let conditioned = std::fs::read(dir.join("conditioned.bin")).unwrap();
834        assert_eq!(conditioned.len(), 200);
835        let conditioned_index = std::fs::read_to_string(dir.join("conditioned_index.csv")).unwrap();
836        let cidx_lines: Vec<&str> = conditioned_index.lines().collect();
837        assert_eq!(cidx_lines.len(), 3);
838        assert!(cidx_lines[1].starts_with("0,100,"));
839        assert!(cidx_lines[2].starts_with("100,100,"));
840
841        // Check session.json
842        let json_str = std::fs::read_to_string(result_dir.join("session.json")).unwrap();
843        let meta: SessionMeta = serde_json::from_str(&json_str).unwrap();
844        assert_eq!(meta.version, 2);
845        assert_eq!(meta.total_samples, 2);
846        assert_eq!(meta.sources, vec!["mock_source"]);
847        assert_eq!(*meta.samples_per_source.get("mock_source").unwrap(), 2);
848        assert_eq!(meta.conditioning, "raw");
849    }
850
851    #[test]
852    fn test_session_writer_multiple_sources() {
853        let tmp = tempfile::tempdir().unwrap();
854        let config = SessionConfig {
855            sources: vec!["source_a".to_string(), "source_b".to_string()],
856            output_dir: tmp.path().to_path_buf(),
857            ..Default::default()
858        };
859
860        let mut writer = SessionWriter::new(config).unwrap();
861        writer.write_sample("source_a", &[1; 50], &[4; 50]).unwrap();
862        writer.write_sample("source_b", &[2; 75], &[5; 75]).unwrap();
863        writer.write_sample("source_a", &[3; 50], &[6; 50]).unwrap();
864
865        assert_eq!(writer.total_samples(), 3);
866        assert_eq!(*writer.samples_per_source().get("source_a").unwrap(), 2);
867        assert_eq!(*writer.samples_per_source().get("source_b").unwrap(), 1);
868
869        let dir = writer.finish().unwrap();
870        let meta: SessionMeta =
871            serde_json::from_str(&std::fs::read_to_string(dir.join("session.json")).unwrap())
872                .unwrap();
873        assert_eq!(meta.total_samples, 3);
874    }
875
876    #[test]
877    fn test_session_writer_with_tags_and_note() {
878        let tmp = tempfile::tempdir().unwrap();
879        let mut tags = HashMap::new();
880        tags.insert("crystal".to_string(), "quartz".to_string());
881        tags.insert("distance".to_string(), "2cm".to_string());
882
883        let config = SessionConfig {
884            sources: vec!["test".to_string()],
885            output_dir: tmp.path().to_path_buf(),
886            tags,
887            note: Some("Testing quartz crystal".to_string()),
888            ..Default::default()
889        };
890
891        let writer = SessionWriter::new(config).unwrap();
892        let dir = writer.finish().unwrap();
893
894        let meta: SessionMeta =
895            serde_json::from_str(&std::fs::read_to_string(dir.join("session.json")).unwrap())
896                .unwrap();
897        assert_eq!(meta.tags.get("crystal").unwrap(), "quartz");
898        assert_eq!(meta.tags.get("distance").unwrap(), "2cm");
899        assert_eq!(meta.note.unwrap(), "Testing quartz crystal");
900    }
901
902    #[test]
903    fn test_session_meta_serialization_roundtrip() {
904        let meta = SessionMeta {
905            version: 2,
906            id: "test-id".to_string(),
907            started_at: "2026-01-01T00:00:00Z".to_string(),
908            ended_at: "2026-01-01T00:05:00Z".to_string(),
909            duration_ms: 300000,
910            sources: vec!["clock_jitter".to_string()],
911            conditioning: "raw".to_string(),
912            interval_ms: Some(100),
913            total_samples: 3000,
914            samples_per_source: {
915                let mut m = HashMap::new();
916                m.insert("clock_jitter".to_string(), 3000);
917                m
918            },
919            machine: MachineInfo {
920                os: "macos 15.4".to_string(),
921                arch: "aarch64".to_string(),
922                chip: "Apple M4".to_string(),
923                cores: 10,
924            },
925            tags: HashMap::new(),
926            note: None,
927            openentropy_version: "0.5.0".to_string(),
928            analysis: None,
929        };
930
931        let json = serde_json::to_string_pretty(&meta).unwrap();
932        let parsed: SessionMeta = serde_json::from_str(&json).unwrap();
933        assert_eq!(parsed.version, 2);
934        assert_eq!(parsed.id, "test-id");
935        assert_eq!(parsed.total_samples, 3000);
936        assert_eq!(parsed.duration_ms, 300000);
937    }
938
939    // -----------------------------------------------------------------------
940    // Drop safety tests
941    // -----------------------------------------------------------------------
942
943    #[test]
944    fn test_drop_writes_session_json_without_finish() {
945        let tmp = tempfile::tempdir().unwrap();
946        let config = SessionConfig {
947            sources: vec!["drop_test".to_string()],
948            output_dir: tmp.path().to_path_buf(),
949            ..Default::default()
950        };
951
952        let mut writer = SessionWriter::new(config).unwrap();
953        let dir = writer.session_dir().to_path_buf();
954        writer
955            .write_sample("drop_test", &[42; 100], &[24; 100])
956            .unwrap();
957        // Drop without calling finish()
958        drop(writer);
959
960        // session.json should still be written by Drop
961        assert!(dir.join("session.json").exists());
962        let meta: SessionMeta =
963            serde_json::from_str(&std::fs::read_to_string(dir.join("session.json")).unwrap())
964                .unwrap();
965        assert_eq!(meta.total_samples, 1);
966    }
967
968    #[test]
969    fn test_finish_prevents_double_write_on_drop() {
970        let tmp = tempfile::tempdir().unwrap();
971        let config = SessionConfig {
972            sources: vec!["test".to_string()],
973            output_dir: tmp.path().to_path_buf(),
974            ..Default::default()
975        };
976
977        let writer = SessionWriter::new(config).unwrap();
978        let dir = writer.session_dir().to_path_buf();
979        let _ = writer.finish().unwrap();
980
981        // session.json should exist (from finish), and Drop should not error
982        assert!(dir.join("session.json").exists());
983    }
984
985    // -----------------------------------------------------------------------
986    // Edge case tests
987    // -----------------------------------------------------------------------
988
989    #[test]
990    fn test_write_sample_skips_empty_bytes() {
991        let tmp = tempfile::tempdir().unwrap();
992        let config = SessionConfig {
993            sources: vec!["test".to_string()],
994            output_dir: tmp.path().to_path_buf(),
995            ..Default::default()
996        };
997
998        let mut writer = SessionWriter::new(config).unwrap();
999        writer.write_sample("test", &[], &[]).unwrap();
1000        assert_eq!(writer.total_samples(), 0);
1001        let _ = writer.finish().unwrap();
1002    }
1003
1004    #[test]
1005    fn test_min_entropy_not_negative_in_csv() {
1006        let tmp = tempfile::tempdir().unwrap();
1007        let config = SessionConfig {
1008            sources: vec!["test".to_string()],
1009            output_dir: tmp.path().to_path_buf(),
1010            ..Default::default()
1011        };
1012
1013        let mut writer = SessionWriter::new(config).unwrap();
1014        // All-same bytes produce near-zero min-entropy that could display as -0.00
1015        writer
1016            .write_sample("test", &[0xAA; 100], &[0xAA; 100])
1017            .unwrap();
1018        let dir = writer.session_dir().to_path_buf();
1019        let _ = writer.finish().unwrap();
1020
1021        let csv = std::fs::read_to_string(dir.join("samples.csv")).unwrap();
1022        for line in csv.lines().skip(1) {
1023            assert!(
1024                !line.contains("-0.00"),
1025                "CSV should not contain negative zero: {line}"
1026            );
1027        }
1028    }
1029
1030    // -----------------------------------------------------------------------
1031    // UTC conversion tests
1032    // -----------------------------------------------------------------------
1033
1034    #[test]
1035    fn test_secs_to_utc_epoch() {
1036        let (y, m, d, h, mi, s) = secs_to_utc(0);
1037        assert_eq!((y, m, d, h, mi, s), (1970, 1, 1, 0, 0, 0));
1038    }
1039
1040    #[test]
1041    fn test_secs_to_utc_known_date() {
1042        // 2000-01-01 00:00:00 UTC = 946684800
1043        let (y, m, d, h, mi, s) = secs_to_utc(946684800);
1044        assert_eq!((y, m, d, h, mi, s), (2000, 1, 1, 0, 0, 0));
1045    }
1046
1047    #[test]
1048    fn test_is_leap() {
1049        assert!(is_leap(2000));
1050        assert!(is_leap(2024));
1051        assert!(!is_leap(1900));
1052        assert!(!is_leap(2023));
1053    }
1054}