1use std::{
2 collections::BTreeMap,
3 fs::File,
4 io::{Read, Seek},
5 path::Path,
6 sync::OnceLock,
7};
8
9use bytes::Buf;
10use sha2::Digest;
11use shared_buffer::OwnedBuffer;
12
13use crate::{
14 DetectError, Magic, Version,
15 metadata::Manifest,
16 v3::{
17 Index, Span, Tag,
18 read::{
19 AtomsSection, ManifestSection, Section, SectionError, VolumeSection,
20 dir_entry::DirEntryError, scanner::InvalidSize, sections::SectionConversionError,
21 },
22 },
23};
24
25#[derive(Debug, Clone, PartialEq)]
27pub struct OwnedReader {
28 buffer: OwnedBuffer,
29 index: Index,
30 manifest: Manifest,
31 atoms_hash: [u8; 32],
32 atoms: BTreeMap<String, ([u8; 32], OwnedBuffer)>,
33 hash: OnceLock<[u8; 32]>,
34}
35
36impl OwnedReader {
37 pub fn parse(webc: impl Into<OwnedBuffer>) -> Result<Self, OwnedReaderError> {
38 let webc: OwnedBuffer = webc.into();
39
40 let version = crate::detect(webc.clone().reader())?;
42 if version != Version::V3 {
43 return Err(OwnedReaderError::UnsupportedVersion(version));
44 }
45 let index = read_index(webc.clone())?;
46
47 let manifest =
50 parse_section(&webc, index.manifest.span).and_then(|section: ManifestSection| {
51 section.manifest().map_err(OwnedReaderError::Manifest)
52 })?;
53 let atoms_section: AtomsSection = parse_section(&webc, index.atoms.span)?;
54 let atoms = atoms_section
55 .iter()
56 .map(|result| result.map(|(s, h, b)| (s.to_string(), (h, b))))
57 .collect::<Result<BTreeMap<String, ([u8; 32], OwnedBuffer)>, DirEntryError>>()
58 .map_err(OwnedReaderError::Atoms)?;
59
60 Ok(OwnedReader {
61 buffer: webc,
62 index,
63 atoms_hash: *atoms_section.get_hash(),
64 atoms,
65 manifest,
66 hash: OnceLock::new(),
67 })
68 }
69
70 pub fn from_path(path: impl AsRef<Path>) -> Result<Self, OwnedReaderError> {
71 let buffer = OwnedBuffer::mmap(path.as_ref())?;
72 OwnedReader::parse(buffer)
73 }
74
75 pub fn from_file(mut file: File) -> Result<Self, OwnedReaderError> {
80 if let Ok(buffer) = OwnedBuffer::from_file(&file) {
81 return OwnedReader::parse(buffer);
82 }
83
84 file.rewind().map_err(OwnedReaderError::Io)?;
86 let mut contents = Vec::new();
87 file.read_to_end(&mut contents)
88 .map_err(OwnedReaderError::Io)?;
89
90 OwnedReader::parse(contents)
91 }
92
93 pub fn webc_hash(&self) -> Option<[u8; 32]> {
94 Some(
95 *self
96 .hash
97 .get_or_init(|| sha2::Sha256::digest(self.buffer.as_slice()).into()),
98 )
99 }
100
101 pub fn manifest(&self) -> &Manifest {
102 &self.manifest
103 }
104
105 pub fn index(&self) -> &Index {
106 &self.index
107 }
108
109 pub fn atoms_hash(&self) -> [u8; 32] {
110 self.atoms_hash
111 }
112
113 pub fn atom_names(&self) -> impl Iterator<Item = &str> + '_ {
114 self.atoms.keys().map(|s| s.as_str())
115 }
116
117 pub fn iter_atoms(&self) -> impl Iterator<Item = (&str, [u8; 32], &OwnedBuffer)> + '_ {
118 self.atoms.iter().map(|(s, (h, b))| (s.as_str(), *h, b))
119 }
120
121 pub fn get_atom(&self, name: &str) -> Option<&([u8; 32], OwnedBuffer)> {
122 self.atoms.get(name)
123 }
124
125 pub fn volume_names(&self) -> impl Iterator<Item = &str> + '_ {
126 self.index.volumes.keys().map(|s| s.as_str())
127 }
128
129 pub fn iter_volumes(
130 &self,
131 ) -> impl Iterator<Item = Result<(&str, VolumeSection), OwnedReaderError>> {
132 self.index.volumes.iter().map(|(name, entry)| {
133 let volume: VolumeSection = parse_section(&self.buffer, entry.span)?;
134 Ok((name.as_str(), volume))
135 })
136 }
137
138 pub fn get_volume(&self, name: &str) -> Result<VolumeSection, OwnedReaderError> {
139 let entry = self
140 .index
141 .volumes
142 .get(name)
143 .ok_or_else(|| OwnedReaderError::NoSuchVolume {
144 name: name.to_string(),
145 })?;
146
147 parse_section(&self.buffer, entry.span)
148 }
149}
150
151fn parse_section<T>(buffer: &OwnedBuffer, span: Span) -> Result<T, OwnedReaderError>
152where
153 T: TryFrom<Section, Error = SectionConversionError>,
154{
155 let (tag, hash, data) = get_section(buffer, span)?;
156
157 let section = Section::parse(tag, Some(hash), data.clone())
158 .map_err(|error| OwnedReaderError::Section { error, tag, data })?;
159
160 T::try_from(section).map_err(OwnedReaderError::from)
161}
162
163fn get_section(
164 buffer: &OwnedBuffer,
165 span: Span,
166) -> Result<(u8, [u8; 32], OwnedBuffer), OwnedReaderError> {
167 get(buffer, span).and_then(read_raw_section)
168}
169
170fn get(buffer: &OwnedBuffer, span: Span) -> Result<OwnedBuffer, OwnedReaderError> {
171 if buffer.len() < span.end() {
172 Err(OwnedReaderError::IndexOutOfBounds {
173 offset: span.end(),
174 bytes_available: buffer.len(),
175 })
176 } else {
177 Ok(buffer.slice(span.start..span.end()))
178 }
179}
180
181fn read_raw_index_section(mut buffer: OwnedBuffer) -> Result<(u8, OwnedBuffer), OwnedReaderError> {
182 const TAG_AND_LEN: usize = std::mem::size_of::<u8>() + std::mem::size_of::<u64>();
183
184 if buffer.len() < TAG_AND_LEN {
185 return Err(OwnedReaderError::Io(std::io::Error::from(
186 std::io::ErrorKind::UnexpectedEof,
187 )));
188 }
189
190 let tag = buffer.get_u8();
191 let length: usize = buffer.get_u64_le().try_into()?;
192
193 if buffer.len() < length {
194 return Err(OwnedReaderError::Io(std::io::Error::from(
195 std::io::ErrorKind::UnexpectedEof,
196 )));
197 }
198
199 let data = buffer.slice(..length);
200 buffer.advance(length);
201
202 Ok((tag, data))
203}
204
205fn read_raw_section(
206 mut buffer: OwnedBuffer,
207) -> Result<(u8, [u8; 32], OwnedBuffer), OwnedReaderError> {
208 const TAG_AND_LEN: usize = std::mem::size_of::<u8>() + 32 + std::mem::size_of::<u64>();
209
210 if buffer.len() < TAG_AND_LEN {
211 return Err(OwnedReaderError::Io(std::io::Error::from(
212 std::io::ErrorKind::UnexpectedEof,
213 )));
214 }
215
216 let tag = buffer.get_u8();
217 let mut hash = [0u8; 32];
218 buffer.copy_to_slice(&mut hash);
219 let length: usize = buffer.get_u64_le().try_into()?;
220
221 if buffer.len() < length {
222 return Err(OwnedReaderError::Io(std::io::Error::from(
223 std::io::ErrorKind::UnexpectedEof,
224 )));
225 }
226
227 let data = buffer.slice(..length);
228 buffer.advance(length);
229
230 Ok((tag, hash, data))
231}
232
233fn read_index(mut webc: OwnedBuffer) -> Result<Index, OwnedReaderError> {
234 const HEADER_LENGTH: usize = std::mem::size_of::<Magic>() + std::mem::size_of::<Version>();
236 webc.advance(HEADER_LENGTH);
237
238 let (tag, data) = read_raw_index_section(webc)?;
239
240 match Section::parse(tag, None, data.clone()) {
241 Ok(Section::Index(index_reader)) => {
242 let index = index_reader.index().map_err(OwnedReaderError::Index)?;
243 Ok(index)
244 }
245 Ok(_) => Err(OwnedReaderError::UnexpectedSection {
246 expected_tag: Tag::Index,
247 actual_tag: tag,
248 offset: HEADER_LENGTH,
249 }),
250 Err(error) => Err(OwnedReaderError::Section { error, tag, data }),
251 }
252}
253
254#[derive(Debug, thiserror::Error)]
256#[non_exhaustive]
257pub enum OwnedReaderError {
258 #[error(transparent)]
259 Io(#[from] std::io::Error),
260 #[error("Invalid magic bytes, {}", _0.escape_ascii())]
261 InvalidMagic(Magic),
262 #[error("The version, {_0}, isn't supported")]
263 UnsupportedVersion(Version),
264 #[error("Expected to find a {expected_tag} at offset {offset:#x}, but found a \"{}\"", Tag::display(*actual_tag))]
265 UnexpectedSection {
266 expected_tag: Tag,
267 actual_tag: u8,
268 offset: usize,
269 },
270 #[error(
271 "Tried to access memory at offset {offset}, but only {bytes_available} bytes are available"
272 )]
273 IndexOutOfBounds {
274 offset: usize,
275 bytes_available: usize,
276 },
277 #[error("Unable to parse the index as CBOR")]
278 Index(ciborium::de::Error<std::io::Error>),
279 #[error("Unable to parse the manifest as CBOR")]
280 Manifest(ciborium::de::Error<std::io::Error>),
281 #[error("Unable to decode a section")]
282 Section {
283 #[source]
284 error: SectionError,
285 tag: u8,
286 data: OwnedBuffer,
287 },
288 #[error("Found the wrong section")]
289 IncorrectSection(#[from] SectionConversionError),
290 #[error("Volume not found: \"{name}\"")]
291 NoSuchVolume { name: String },
292 #[error("Unable to determine the atoms")]
293 Atoms(DirEntryError),
294 #[error("Unable to detect the WEBC file's version number")]
295 Detect(#[from] DetectError),
296 #[error(transparent)]
297 Mmap(#[from] shared_buffer::MmapError),
298 #[error(transparent)]
299 IntegerConversion(#[from] std::num::TryFromIntError),
300}
301
302impl From<InvalidSize> for OwnedReaderError {
303 fn from(value: InvalidSize) -> Self {
304 let InvalidSize { expected, actual } = value;
305 OwnedReaderError::IndexOutOfBounds {
306 offset: expected,
307 bytes_available: actual,
308 }
309 }
310}