mur_common/muragent/
reader.rs1use crate::muragent::MuragentError;
4use flate2::read::GzDecoder;
5use std::collections::BTreeMap;
6use std::io::Read;
7use std::path::Path;
8use tar::Archive;
9
10const MAX_ENTRIES: usize = 10_000;
16const MAX_FILE_BYTES: u64 = 64 * 1024 * 1024; const MAX_TOTAL_BYTES: u64 = 256 * 1024 * 1024; #[derive(Debug)]
20pub struct MuragentArchive {
21 pub files: BTreeMap<String, Vec<u8>>,
23}
24
25impl MuragentArchive {
26 pub fn read(path: &Path) -> Result<Self, MuragentError> {
28 Self::read_with_limits(path, MAX_ENTRIES, MAX_FILE_BYTES, MAX_TOTAL_BYTES)
29 }
30
31 fn read_with_limits(
34 path: &Path,
35 max_entries: usize,
36 max_file_bytes: u64,
37 max_total_bytes: u64,
38 ) -> Result<Self, MuragentError> {
39 let file = std::fs::File::open(path).map_err(MuragentError::Io)?;
40 let gz = GzDecoder::new(file);
41 let mut archive = Archive::new(gz);
42 let mut files = BTreeMap::new();
43 let mut entry_count = 0usize;
44 let mut total_bytes = 0u64;
45
46 for entry in archive
47 .entries()
48 .map_err(|e| MuragentError::Other(format!("tar entries: {e}")))?
49 {
50 entry_count += 1;
51 if entry_count > max_entries {
52 return Err(MuragentError::Other(format!(
53 "too many entries in .muragent (>{max_entries})"
54 )));
55 }
56 let mut entry = entry.map_err(|e| MuragentError::Other(format!("tar entry: {e}")))?;
57
58 let entry_path = entry
59 .path()
60 .map_err(|e| MuragentError::Other(format!("entry path: {e}")))?
61 .to_str()
62 .ok_or_else(|| MuragentError::Other("non-UTF-8 path in tarball".into()))?
63 .to_string();
64
65 let entry_type = entry.header().entry_type();
66 if entry_type == tar::EntryType::Symlink || entry_type == tar::EntryType::Link {
67 return Err(MuragentError::ExecutableContent(format!(
68 "symlinks not allowed in .muragent: {entry_path}"
69 )));
70 }
71
72 if entry_type != tar::EntryType::Regular
73 && entry_type != tar::EntryType::Directory
74 && entry_type != tar::EntryType::GNULongName
75 && entry_type != tar::EntryType::GNULongLink
76 {
77 return Err(MuragentError::ExecutableContent(format!(
78 "tar entry type {:?} not allowed: {entry_path}",
79 entry_type
80 )));
81 }
82
83 if entry_type == tar::EntryType::Directory {
85 continue;
86 }
87
88 crate::muragent::jcs_canonical::validate_tarball_path(&entry_path)
89 .map_err(|e| MuragentError::Other(e.to_string()))?;
90
91 let mode = entry.header().mode().unwrap_or(0o644);
93 crate::muragent::executable_ban::check_mode_bits(mode, false)
94 .map_err(MuragentError::ExecutableContent)?;
95
96 let mut data = Vec::new();
101 entry
102 .by_ref()
103 .take(max_file_bytes + 1)
104 .read_to_end(&mut data)
105 .map_err(MuragentError::Io)?;
106 if data.len() as u64 > max_file_bytes {
107 return Err(MuragentError::Other(format!(
108 "file exceeds {max_file_bytes} bytes in .muragent: {entry_path}"
109 )));
110 }
111 total_bytes += data.len() as u64;
112 if total_bytes > max_total_bytes {
113 return Err(MuragentError::Other(format!(
114 "decompressed .muragent exceeds {max_total_bytes} bytes total"
115 )));
116 }
117
118 files.insert(entry_path, data);
119 }
120
121 Ok(Self { files })
122 }
123
124 pub fn get(&self, path: &str) -> Option<&[u8]> {
125 self.files.get(path).map(|v| v.as_slice())
126 }
127
128 pub fn get_str(&self, path: &str) -> Result<&str, MuragentError> {
129 let bytes = self
130 .get(path)
131 .ok_or_else(|| MuragentError::Other(format!("file not found: {path}")))?;
132 std::str::from_utf8(bytes)
133 .map_err(|e| MuragentError::Other(format!("{path} is not valid UTF-8: {e}")))
134 }
135
136 pub fn files_as_vec(&self) -> Vec<(String, Vec<u8>)> {
137 self.files
138 .iter()
139 .map(|(k, v)| (k.clone(), v.clone()))
140 .collect()
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147 use std::io::Write;
148
149 fn make_targz(files: &[(&str, &[u8])]) -> std::path::PathBuf {
151 let mut builder = tar::Builder::new(flate2::write::GzEncoder::new(
152 Vec::new(),
153 flate2::Compression::fast(),
154 ));
155 for (name, data) in files {
156 let mut header = tar::Header::new_gnu();
157 header.set_size(data.len() as u64);
158 header.set_mode(0o644);
159 header.set_cksum();
160 builder.append_data(&mut header, name, *data).unwrap();
161 }
162 let gz = builder.into_inner().unwrap().finish().unwrap();
163 let dir = tempfile::tempdir().unwrap();
164 let path = dir.path().join("t.muragent");
165 std::mem::forget(dir);
167 let mut f = std::fs::File::create(&path).unwrap();
168 f.write_all(&gz).unwrap();
169 path
170 }
171
172 #[test]
173 fn within_limits_reads_ok() {
174 let p = make_targz(&[("a.txt", b"hello"), ("b.txt", b"world")]);
175 let arc = MuragentArchive::read_with_limits(&p, 10, 1024, 4096).unwrap();
176 assert_eq!(arc.files.len(), 2);
177 }
178
179 #[test]
180 fn rejects_oversized_single_file() {
181 let p = make_targz(&[("big.bin", &vec![0u8; 200])]);
182 let err = MuragentArchive::read_with_limits(&p, 10, 100, 1_000_000).unwrap_err();
183 assert!(format!("{err}").contains("exceeds"), "got: {err}");
184 }
185
186 #[test]
187 fn rejects_oversized_total() {
188 let p = make_targz(&[("a.bin", &vec![0u8; 100]), ("b.bin", &vec![0u8; 100])]);
189 let err = MuragentArchive::read_with_limits(&p, 10, 1024, 150).unwrap_err();
190 assert!(format!("{err}").contains("total"), "got: {err}");
191 }
192
193 #[test]
194 fn rejects_too_many_entries() {
195 let p = make_targz(&[("a", b"1"), ("b", b"2"), ("c", b"3")]);
196 let err = MuragentArchive::read_with_limits(&p, 2, 1024, 4096).unwrap_err();
197 assert!(format!("{err}").contains("too many entries"), "got: {err}");
198 }
199}