Skip to main content

shellhist_core/
lib.rs

1//! `shellhist-core` — readers for shell command-history files.
2//!
3//! Parses the four common history formats into a uniform [`HistoryEntry`] stream:
4//! bash (`.bash_history`), zsh (`.zsh_history`, including `EXTENDED_HISTORY`),
5//! PowerShell PSReadLine (`ConsoleHost_history.txt`), and fish (`fish_history`).
6//!
7//! The input is attacker-controllable evidence: parsing is lenient (lossy UTF-8),
8//! bounds-checked, and never panics. No `unsafe`. Findings live in the sibling
9//! `shellhist-forensic` crate; this crate only decodes.
10
11#![forbid(unsafe_code)]
12
13pub mod bash;
14pub mod fish;
15pub mod powershell;
16pub mod zsh;
17
18/// The shell a history file was produced by.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub enum Shell {
21    Bash,
22    Zsh,
23    Fish,
24    PowerShell,
25    /// Format could not be determined; parsed as plain one-command-per-line.
26    Unknown,
27}
28
29/// One command-history entry, normalized across shells.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct HistoryEntry {
32    /// The shell this entry was decoded from.
33    pub shell: Shell,
34    /// The command text (multi-line commands keep their embedded newlines).
35    pub command: String,
36    /// Start time as Unix epoch seconds, when the format records it (bash with
37    /// `HISTTIMEFORMAT`, zsh `EXTENDED_HISTORY`, fish). `None` for plain bash and
38    /// PowerShell PSReadLine, which store no timestamps.
39    pub timestamp: Option<i64>,
40    /// Wall-clock duration in seconds (zsh `EXTENDED_HISTORY` only).
41    pub elapsed: Option<i64>,
42    /// Filesystem paths fish heuristically associated with the command.
43    pub paths: Vec<String>,
44}
45
46impl HistoryEntry {
47    pub(crate) fn plain(shell: Shell, command: impl Into<String>) -> Self {
48        Self {
49            shell,
50            command: command.into(),
51            timestamp: None,
52            elapsed: None,
53            paths: Vec::new(),
54        }
55    }
56}
57
58/// Strip a leading UTF-8 BOM (`EF BB BF`) if present.
59#[must_use]
60pub fn strip_bom(data: &[u8]) -> &[u8] {
61    data.strip_prefix(&[0xEF, 0xBB, 0xBF]).unwrap_or(data)
62}
63
64/// Detect the history format from the bytes and an optional filename hint.
65///
66/// Content sniffing wins over the filename (a renamed file is still parseable):
67/// a zsh `: <epoch>:<elapsed>;` line or a fish `- cmd:` record is unambiguous; a
68/// bash `#<epoch>` timestamp line marks timestamped bash. Otherwise the filename
69/// disambiguates PSReadLine vs plain bash; failing that, `Unknown` (plain lines).
70#[must_use]
71pub fn detect(data: &[u8], filename: Option<&str>) -> Shell {
72    let text = String::from_utf8_lossy(strip_bom(data));
73
74    for line in text.lines().take(200) {
75        if zsh::is_extended_line(line) {
76            return Shell::Zsh;
77        }
78        if line.starts_with("- cmd:") {
79            return Shell::Fish;
80        }
81        if bash::parse_timestamp_line(line).is_some() {
82            return Shell::Bash;
83        }
84    }
85
86    if let Some(name) = filename {
87        let lower = name.to_ascii_lowercase();
88        if lower.contains("zsh_history") {
89            return Shell::Zsh;
90        }
91        if lower.contains("fish_history") {
92            return Shell::Fish;
93        }
94        if lower.contains("bash_history") {
95            return Shell::Bash;
96        }
97        if lower.contains("consolehost_history") || lower.contains("psreadline") {
98            return Shell::PowerShell;
99        }
100    }
101
102    Shell::Unknown
103}
104
105/// Parse history bytes as the given shell.
106#[must_use]
107pub fn parse(data: &[u8], shell: Shell) -> Vec<HistoryEntry> {
108    match shell {
109        Shell::Bash | Shell::Unknown => bash::parse(data),
110        Shell::Zsh => zsh::parse(data),
111        Shell::Fish => fish::parse(data),
112        Shell::PowerShell => powershell::parse(data),
113    }
114}
115
116/// Detect the format, then parse. The zero-knowledge entry point.
117#[must_use]
118pub fn parse_auto(data: &[u8], filename: Option<&str>) -> Vec<HistoryEntry> {
119    parse(data, detect(data, filename))
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn strip_bom_removes_only_a_leading_bom() {
128        assert_eq!(strip_bom(b"\xEF\xBB\xBFhi"), b"hi");
129        assert_eq!(strip_bom(b"hi"), b"hi");
130    }
131
132    #[test]
133    fn detect_zsh_by_extended_line() {
134        assert_eq!(detect(b": 1700000000:0;ls", None), Shell::Zsh);
135    }
136
137    #[test]
138    fn detect_bash_by_timestamp_line() {
139        assert_eq!(detect(b"#1700000000\nls\n", None), Shell::Bash);
140    }
141
142    #[test]
143    fn detect_fish_by_cmd_record() {
144        assert_eq!(
145            detect(b"- cmd: ls\n  when: 1700000000\n", None),
146            Shell::Fish
147        );
148    }
149
150    #[test]
151    fn detect_powershell_by_filename_when_content_is_plain() {
152        assert_eq!(
153            detect(b"Get-Process\nls\n", Some("ConsoleHost_history.txt")),
154            Shell::PowerShell
155        );
156    }
157
158    #[test]
159    fn detect_falls_back_to_unknown_for_plain_unnamed() {
160        assert_eq!(detect(b"ls\ncd /tmp\n", None), Shell::Unknown);
161    }
162
163    #[test]
164    fn parse_auto_unknown_is_plain_lines() {
165        let e = parse_auto(b"ls\ncd /tmp\n", None);
166        assert_eq!(e.len(), 2);
167        assert_eq!(e[0].command, "ls");
168        assert_eq!(e[1].timestamp, None);
169    }
170}