Skip to main content

mur_common/muragent/
reader.rs

1//! `.muragent` reader — extract and inspect a signed agent package.
2
3use crate::muragent::MuragentError;
4use flate2::read::GzDecoder;
5use std::collections::BTreeMap;
6use std::io::Read;
7use std::path::Path;
8use tar::Archive;
9
10/// Resource bounds for an untrusted `.muragent`. A `.muragent` is a tar.gz from
11/// an untrusted source (a friend, a download), so the reader must not let a tiny
12/// archive decompress into unbounded memory (a gzip/tar bomb). These caps are
13/// generous for a real agent bundle (manifest + a few icons/skills) but stop a
14/// malicious package from OOM-ing the host on import.
15const MAX_ENTRIES: usize = 10_000;
16const MAX_FILE_BYTES: u64 = 64 * 1024 * 1024; // 64 MiB per file
17const MAX_TOTAL_BYTES: u64 = 256 * 1024 * 1024; // 256 MiB decompressed total
18
19#[derive(Debug)]
20pub struct MuragentArchive {
21    /// All files in the tarball keyed by path → raw bytes.
22    pub files: BTreeMap<String, Vec<u8>>,
23}
24
25impl MuragentArchive {
26    /// Read and extract all files from a `.muragent` tar.gz.
27    pub fn read(path: &Path) -> Result<Self, MuragentError> {
28        Self::read_with_limits(path, MAX_ENTRIES, MAX_FILE_BYTES, MAX_TOTAL_BYTES)
29    }
30
31    /// Implementation of [`read`](Self::read) with explicit resource caps, so
32    /// the bomb defenses can be exercised with small limits in tests.
33    fn read_with_limits(
34        path: &Path,
35        max_entries: usize,
36        max_file_bytes: u64,
37        max_total_bytes: u64,
38    ) -> Result<Self, MuragentError> {
39        let file = std::fs::File::open(path).map_err(MuragentError::Io)?;
40        let gz = GzDecoder::new(file);
41        let mut archive = Archive::new(gz);
42        let mut files = BTreeMap::new();
43        let mut entry_count = 0usize;
44        let mut total_bytes = 0u64;
45
46        for entry in archive
47            .entries()
48            .map_err(|e| MuragentError::Other(format!("tar entries: {e}")))?
49        {
50            entry_count += 1;
51            if entry_count > max_entries {
52                return Err(MuragentError::Other(format!(
53                    "too many entries in .muragent (>{max_entries})"
54                )));
55            }
56            let mut entry = entry.map_err(|e| MuragentError::Other(format!("tar entry: {e}")))?;
57
58            let entry_path = entry
59                .path()
60                .map_err(|e| MuragentError::Other(format!("entry path: {e}")))?
61                .to_str()
62                .ok_or_else(|| MuragentError::Other("non-UTF-8 path in tarball".into()))?
63                .to_string();
64
65            let entry_type = entry.header().entry_type();
66            if entry_type == tar::EntryType::Symlink || entry_type == tar::EntryType::Link {
67                return Err(MuragentError::ExecutableContent(format!(
68                    "symlinks not allowed in .muragent: {entry_path}"
69                )));
70            }
71
72            if entry_type != tar::EntryType::Regular
73                && entry_type != tar::EntryType::Directory
74                && entry_type != tar::EntryType::GNULongName
75                && entry_type != tar::EntryType::GNULongLink
76            {
77                return Err(MuragentError::ExecutableContent(format!(
78                    "tar entry type {:?} not allowed: {entry_path}",
79                    entry_type
80                )));
81            }
82
83            // Skip directories — we don't need them in the map
84            if entry_type == tar::EntryType::Directory {
85                continue;
86            }
87
88            crate::muragent::jcs_canonical::validate_tarball_path(&entry_path)
89                .map_err(|e| MuragentError::Other(e.to_string()))?;
90
91            // Check mode bits — regular files must not be executable
92            let mode = entry.header().mode().unwrap_or(0o644);
93            crate::muragent::executable_ban::check_mode_bits(mode, false)
94                .map_err(MuragentError::ExecutableContent)?;
95
96            // Read with a per-file cap (read one byte past the limit to detect
97            // overflow), then enforce the running decompressed-total cap. This
98            // is what actually stops a gzip/tar bomb — the header size field is
99            // attacker-controlled and cannot be trusted.
100            let mut data = Vec::new();
101            entry
102                .by_ref()
103                .take(max_file_bytes + 1)
104                .read_to_end(&mut data)
105                .map_err(MuragentError::Io)?;
106            if data.len() as u64 > max_file_bytes {
107                return Err(MuragentError::Other(format!(
108                    "file exceeds {max_file_bytes} bytes in .muragent: {entry_path}"
109                )));
110            }
111            total_bytes += data.len() as u64;
112            if total_bytes > max_total_bytes {
113                return Err(MuragentError::Other(format!(
114                    "decompressed .muragent exceeds {max_total_bytes} bytes total"
115                )));
116            }
117
118            files.insert(entry_path, data);
119        }
120
121        Ok(Self { files })
122    }
123
124    pub fn get(&self, path: &str) -> Option<&[u8]> {
125        self.files.get(path).map(|v| v.as_slice())
126    }
127
128    pub fn get_str(&self, path: &str) -> Result<&str, MuragentError> {
129        let bytes = self
130            .get(path)
131            .ok_or_else(|| MuragentError::Other(format!("file not found: {path}")))?;
132        std::str::from_utf8(bytes)
133            .map_err(|e| MuragentError::Other(format!("{path} is not valid UTF-8: {e}")))
134    }
135
136    pub fn files_as_vec(&self) -> Vec<(String, Vec<u8>)> {
137        self.files
138            .iter()
139            .map(|(k, v)| (k.clone(), v.clone()))
140            .collect()
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use std::io::Write;
148
149    /// Build an in-memory `.muragent`-shaped tar.gz from (path, bytes) pairs.
150    fn make_targz(files: &[(&str, &[u8])]) -> std::path::PathBuf {
151        let mut builder = tar::Builder::new(flate2::write::GzEncoder::new(
152            Vec::new(),
153            flate2::Compression::fast(),
154        ));
155        for (name, data) in files {
156            let mut header = tar::Header::new_gnu();
157            header.set_size(data.len() as u64);
158            header.set_mode(0o644);
159            header.set_cksum();
160            builder.append_data(&mut header, name, *data).unwrap();
161        }
162        let gz = builder.into_inner().unwrap().finish().unwrap();
163        let dir = tempfile::tempdir().unwrap();
164        let path = dir.path().join("t.muragent");
165        // Keep the tempdir alive by leaking it — fine for a unit test.
166        std::mem::forget(dir);
167        let mut f = std::fs::File::create(&path).unwrap();
168        f.write_all(&gz).unwrap();
169        path
170    }
171
172    #[test]
173    fn within_limits_reads_ok() {
174        let p = make_targz(&[("a.txt", b"hello"), ("b.txt", b"world")]);
175        let arc = MuragentArchive::read_with_limits(&p, 10, 1024, 4096).unwrap();
176        assert_eq!(arc.files.len(), 2);
177    }
178
179    #[test]
180    fn rejects_oversized_single_file() {
181        let p = make_targz(&[("big.bin", &vec![0u8; 200])]);
182        let err = MuragentArchive::read_with_limits(&p, 10, 100, 1_000_000).unwrap_err();
183        assert!(format!("{err}").contains("exceeds"), "got: {err}");
184    }
185
186    #[test]
187    fn rejects_oversized_total() {
188        let p = make_targz(&[("a.bin", &vec![0u8; 100]), ("b.bin", &vec![0u8; 100])]);
189        let err = MuragentArchive::read_with_limits(&p, 10, 1024, 150).unwrap_err();
190        assert!(format!("{err}").contains("total"), "got: {err}");
191    }
192
193    #[test]
194    fn rejects_too_many_entries() {
195        let p = make_targz(&[("a", b"1"), ("b", b"2"), ("c", b"3")]);
196        let err = MuragentArchive::read_with_limits(&p, 2, 1024, 4096).unwrap_err();
197        assert!(format!("{err}").contains("too many entries"), "got: {err}");
198    }
199}