chmlib/
lib.rs

1use std::{
2    any::Any,
3    error::Error,
4    ffi::CString,
5    fmt::{self, Debug, Formatter},
6    mem::{ManuallyDrop, MaybeUninit},
7    os::raw::{c_int, c_void},
8    panic,
9    path::Path,
10    ptr::NonNull,
11};
12use thiserror::Error;
13
14#[derive(Debug)]
15pub struct ChmFile {
16    raw: NonNull<chmlib_sys::chmFile>,
17}
18
19impl ChmFile {
20    /// Open a [`ChmFile`] from the file system.
21    pub fn open<P: AsRef<Path>>(path: P) -> Result<ChmFile, OpenError> {
22        let c_path = path_to_cstring(path.as_ref())?;
23
24        unsafe {
25            let raw = chmlib_sys::chm_open(c_path.as_ptr());
26            match NonNull::new(raw) {
27                Some(raw) => Ok(ChmFile { raw }),
28                None => Err(OpenError::Other),
29            }
30        }
31    }
32
33    /// Find a particular object in the archive.
34    pub fn find<P: AsRef<Path>>(&mut self, path: P) -> Option<UnitInfo> {
35        let path = path_to_cstring(path.as_ref()).ok()?;
36
37        unsafe {
38            // put an uninitialized chmUnitInfo on the stack
39            let mut resolved = MaybeUninit::<chmlib_sys::chmUnitInfo>::uninit();
40
41            // then try to resolve the unit info
42            let ret = chmlib_sys::chm_resolve_object(
43                self.raw.as_ptr(),
44                path.as_ptr(),
45                resolved.as_mut_ptr(),
46            );
47
48            if ret == chmlib_sys::CHM_RESOLVE_SUCCESS as i32 {
49                // if successful, "resolved" would have been initialized by C
50                Some(UnitInfo::from_raw(resolved.assume_init()))
51            } else {
52                None
53            }
54        }
55    }
56
57    /// Inspect each item within the [`ChmFile`].
58    pub fn for_each<F, C>(
59        &mut self,
60        filter: Filter,
61        cb: F,
62    ) -> Result<(), EnumerationError>
63    where
64        F: FnMut(&mut ChmFile, UnitInfo) -> C,
65        C: Into<Continuation>,
66    {
67        unsafe {
68            let mut state = WrapperState::new(cb);
69            let ret = chmlib_sys::chm_enumerate(
70                self.raw.as_ptr(),
71                filter.bits(),
72                Some(function_wrapper::<F, C>),
73                &mut state as *mut _ as *mut c_void,
74            );
75            handle_enumeration_result(state, ret)
76        }
77    }
78
79    /// Inspect each item within the [`ChmFile`] inside a specified directory.
80    pub fn for_each_item_in_dir<F, C, P>(
81        &mut self,
82        filter: Filter,
83        prefix: P,
84        cb: F,
85    ) -> Result<(), EnumerationError>
86    where
87        P: AsRef<Path>,
88        F: FnMut(&mut ChmFile, UnitInfo) -> C,
89        C: Into<Continuation>,
90    {
91        let path = path_to_cstring(prefix.as_ref())
92            .map_err(EnumerationError::InvalidPrefix)?;
93
94        unsafe {
95            let mut state = WrapperState::new(cb);
96            let ret = chmlib_sys::chm_enumerate_dir(
97                self.raw.as_ptr(),
98                path.as_ptr(),
99                filter.bits(),
100                Some(function_wrapper::<F, C>),
101                &mut state as *mut _ as *mut c_void,
102            );
103            handle_enumeration_result(state, ret)
104        }
105    }
106
107    pub fn read(
108        &mut self,
109        unit: &UnitInfo,
110        offset: u64,
111        buffer: &mut [u8],
112    ) -> Result<usize, ReadError> {
113        let mut unit = unit.0.clone();
114
115        let bytes_written = unsafe {
116            chmlib_sys::chm_retrieve_object(
117                self.raw.as_ptr(),
118                &mut unit,
119                buffer.as_mut_ptr(),
120                offset,
121                buffer.len() as _,
122            )
123        };
124
125        if bytes_written >= 0 {
126            Ok(bytes_written as usize)
127        } else {
128            Err(ReadError)
129        }
130    }
131}
132
133fn handle_enumeration_result<F>(
134    state: WrapperState<F>,
135    ret: c_int,
136) -> Result<(), EnumerationError> {
137    if let Some(panic) = state.panic {
138        panic::resume_unwind(panic)
139    } else if let Some(err) = state.error {
140        Err(EnumerationError::User(err))
141    } else if ret < 0 {
142        Err(EnumerationError::Internal)
143    } else {
144        Ok(())
145    }
146}
147
148struct WrapperState<F> {
149    closure: F,
150    error: Option<Box<dyn Error + 'static>>,
151    panic: Option<Box<dyn Any + Send + 'static>>,
152}
153
154impl<F> WrapperState<F> {
155    fn new(closure: F) -> WrapperState<F> {
156        WrapperState {
157            closure,
158            error: None,
159            panic: None,
160        }
161    }
162}
163
164unsafe extern "C" fn function_wrapper<F, C>(
165    file: *mut chmlib_sys::chmFile,
166    unit: *mut chmlib_sys::chmUnitInfo,
167    state: *mut c_void,
168) -> c_int
169where
170    F: FnMut(&mut ChmFile, UnitInfo) -> C,
171    C: Into<Continuation>,
172{
173    // we need to make sure panics can't escape across the FFI boundary.
174    let result = panic::catch_unwind(|| {
175        // Use ManuallyDrop because we want to give the caller a `&mut ChmFile`
176        // but want to make sure the destructor is never called (to
177        // prevent double-frees).
178        let mut file = ManuallyDrop::new(ChmFile {
179            raw: NonNull::new_unchecked(file),
180        });
181        let unit = UnitInfo::from_raw(unit.read());
182        // the opaque state pointer is guaranteed to point to an instance of our
183        // closure
184        let state = &mut *(state as *mut WrapperState<F>);
185        (state.closure)(&mut file, unit)
186    });
187
188    let mut state = &mut *(state as *mut WrapperState<F>);
189
190    match result.map(Into::into) {
191        Ok(Continuation::Continue) => {
192            chmlib_sys::CHM_ENUMERATOR_CONTINUE as c_int
193        },
194        Ok(Continuation::Failure(err)) => {
195            state.error = Some(err);
196            chmlib_sys::CHM_ENUMERATOR_FAILURE as c_int
197        },
198        Ok(Continuation::Stop) => chmlib_sys::CHM_ENUMERATOR_SUCCESS as c_int,
199        Err(panic) => {
200            state.panic = Some(panic);
201            chmlib_sys::CHM_ENUMERATOR_FAILURE as c_int
202        },
203    }
204}
205
206impl Drop for ChmFile {
207    fn drop(&mut self) {
208        unsafe {
209            chmlib_sys::chm_close(self.raw.as_ptr());
210        }
211    }
212}
213
214bitflags::bitflags! {
215    pub struct Filter: c_int {
216        /// A normal file.
217        const NORMAL = chmlib_sys::CHM_ENUMERATE_NORMAL as c_int;
218        /// A meta file (typically used by the CHM system).
219        const META = chmlib_sys::CHM_ENUMERATE_META as c_int;
220        /// A special file (starts with `#` or `$`).
221        const SPECIAL = chmlib_sys::CHM_ENUMERATE_SPECIAL as c_int;
222        /// It's a file.
223        const FILES = chmlib_sys::CHM_ENUMERATE_FILES as c_int;
224        /// It's a directory.
225        const DIRS = chmlib_sys::CHM_ENUMERATE_DIRS as c_int;
226    }
227}
228
229pub enum Continuation {
230    /// Continue iterating over items.
231    Continue,
232    /// Stop iterating and bail with an error.
233    Failure(Box<dyn Error + 'static>),
234    /// Stop iterating without returning an error (e.g. iteration finished
235    /// successfully).
236    Stop,
237}
238
239impl From<()> for Continuation {
240    fn from(_: ()) -> Continuation { Continuation::Continue }
241}
242
243impl<E: Into<Box<dyn Error + 'static>>> From<Result<(), E>> for Continuation {
244    fn from(other: Result<(), E>) -> Continuation {
245        match other {
246            Ok(_) => Continuation::Continue,
247            Err(e) => Continuation::Failure(e.into()),
248        }
249    }
250}
251
252#[repr(transparent)]
253pub struct UnitInfo(chmlib_sys::chmUnitInfo);
254
255impl UnitInfo {
256    fn from_raw(ui: chmlib_sys::chmUnitInfo) -> UnitInfo { UnitInfo(ui) }
257
258    fn flags(&self) -> Filter { Filter::from_bits_truncate(self.0.flags) }
259
260    pub fn is_normal(&self) -> bool { self.flags().contains(Filter::NORMAL) }
261
262    pub fn is_special(&self) -> bool { self.flags().contains(Filter::SPECIAL) }
263
264    pub fn is_meta(&self) -> bool { self.flags().contains(Filter::META) }
265
266    pub fn is_file(&self) -> bool { self.flags().contains(Filter::FILES) }
267
268    pub fn is_dir(&self) -> bool { self.flags().contains(Filter::DIRS) }
269
270    pub fn space(&self) -> c_int { self.0.space }
271
272    /// The starting position within the underlying file.
273    pub fn start(&self) -> u64 { self.0.start }
274
275    /// The number of bytes in this item.
276    pub fn length(&self) -> u64 { self.0.length }
277
278    /// The item's filename.
279    ///
280    /// # Security
281    ///
282    /// This path is provided by the original CHM file's author. It is the
283    /// caller's responsibility to handle malicious input (e.g.
284    /// `/../../../etc/passwd`).
285    pub fn path(&self) -> Option<&Path> {
286        let end = self
287            .0
288            .path
289            .iter()
290            .position(|b| *b == 0)
291            .unwrap_or(self.0.path.len());
292
293        // we need to cast from c_char* to u8*
294        let path = unsafe {
295            std::slice::from_raw_parts(self.0.path.as_ptr() as *const u8, end)
296        };
297
298        std::str::from_utf8(path).map(Path::new).ok()
299    }
300}
301
302impl Debug for UnitInfo {
303    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
304        let path = self.path().unwrap_or(Path::new(""));
305
306        f.debug_struct("UnitInfo")
307            .field("start", &self.0.start)
308            .field("length", &self.0.length)
309            .field("flags", &self.0.flags)
310            .field("space", &self.0.space)
311            .field("path", &path)
312            .finish()
313    }
314}
315
316#[derive(Error, Debug, Copy, Clone, PartialEq)]
317#[error("Invalid Path")]
318pub struct InvalidPath;
319
320#[derive(Error, Debug)]
321pub enum EnumerationError {
322    /// A user-provided error.
323    #[error("An error was encountered while iterating")]
324    User(#[source] Box<dyn Error + 'static>),
325    #[error("The prefix was invalid")]
326    InvalidPrefix(#[source] InvalidPath),
327    #[error("CHMLib returned an error")]
328    Internal,
329}
330
331#[derive(Error, Debug, Copy, Clone, PartialEq)]
332#[error("The read failed")]
333pub struct ReadError;
334
335/// The error returned when we are unable to open a [`ChmFile`].
336#[derive(Error, Debug, Copy, Clone, PartialEq)]
337pub enum OpenError {
338    #[error("Invalid path")]
339    InvalidPath(#[from] InvalidPath),
340    #[error("Unable to open the ChmFile")]
341    Other,
342}
343
344#[cfg(unix)]
345fn path_to_cstring(path: &Path) -> Result<CString, InvalidPath> {
346    use std::os::unix::ffi::OsStrExt;
347    let bytes = path.as_os_str().as_bytes();
348    CString::new(bytes).map_err(|_| InvalidPath)
349}
350
351#[cfg(not(unix))]
352fn path_to_cstring(path: &Path) -> Result<CString, InvalidPath> {
353    // Unfortunately, on Windows CHMLib uses CreateFileA() which means all
354    // paths will need to be ascii. This can get quite messy, so let's just
355    // cross our fingers and hope for the best?
356    let rust_str = path.as_os_str().as_str().ok_or(InvalidPath)?;
357    CString::new(rust_str).map_err(|_| InvalidPath)
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use std::path::PathBuf;
364
365    fn sample_path() -> PathBuf {
366        let project_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
367        let sample = project_dir.parent().unwrap().join("topics.classic.chm");
368        assert!(sample.exists());
369
370        sample
371    }
372
373    #[test]
374    fn open_valid_chm_file() {
375        let sample = sample_path();
376
377        // open the file
378        let chm_file = ChmFile::open(&sample).unwrap();
379        // then immediately close it
380        drop(chm_file);
381    }
382
383    #[test]
384    fn find_an_item_in_the_sample() {
385        let sample = sample_path();
386        let mut chm = ChmFile::open(&sample).unwrap();
387
388        assert!(chm.find("/BrowserView.html").is_some());
389        assert!(chm.find("doesn't exist.txt").is_none());
390    }
391
392    #[test]
393    fn iterate_over_items() {
394        let sample = sample_path();
395        let mut chm = ChmFile::open(&sample).unwrap();
396
397        let mut normal = 0;
398        let mut special = 0;
399        let mut meta = 0;
400        let mut files = 0;
401        let mut dirs = 0;
402
403        chm.for_each(Filter::all(), |_chm, unit| {
404            if unit.flags().contains(Filter::NORMAL) {
405                normal += 1
406            }
407            if unit.flags().contains(Filter::SPECIAL) {
408                special += 1
409            }
410            if unit.flags().contains(Filter::META) {
411                meta += 1
412            }
413            if unit.flags().contains(Filter::FILES) {
414                files += 1
415            }
416            if unit.flags().contains(Filter::DIRS) {
417                dirs += 1
418            }
419
420            Continuation::Continue
421        })
422        .unwrap();
423
424        assert_eq!(normal, 199);
425        assert_eq!(special, 18);
426        assert_eq!(meta, 7);
427        assert_eq!(files, 179);
428        assert_eq!(dirs, 45);
429    }
430
431    #[test]
432    fn read_an_item() {
433        let sample = sample_path();
434        let mut chm = ChmFile::open(&sample).unwrap();
435        let filename = "/template/packages/core-web/css/index.responsive.css";
436
437        // look for a known file
438        let item = chm.find(filename).unwrap();
439
440        // then read it into a buffer
441        let mut buffer = vec![0; item.length() as usize];
442        let bytes_written = chm.read(&item, 0, &mut buffer).unwrap();
443
444        // we should have read everything
445        assert_eq!(bytes_written, item.length() as usize);
446
447        // ... and got what we expected
448        let got = String::from_utf8(buffer).unwrap();
449        assert!(got.starts_with(
450            "html, body, div#i-index-container, div#i-index-body"
451        ));
452    }
453
454    #[test]
455    fn continuation_with_unit() {
456        let sample = sample_path();
457        let mut chm = ChmFile::open(&sample).unwrap();
458
459        chm.for_each(Filter::all(), |_, _| {}).unwrap();
460    }
461
462    #[test]
463    fn continuation_with_result() {
464        let sample = sample_path();
465        let mut chm = ChmFile::open(&sample).unwrap();
466
467        let got_err = chm
468            .for_each(Filter::all(), |_, _| Err(InvalidPath))
469            .unwrap_err();
470
471        match got_err {
472            EnumerationError::User(err) => {
473                assert!(err.downcast_ref::<InvalidPath>().is_some())
474            },
475            _ => panic!("Unexpected error: {}", got_err),
476        }
477    }
478
479    #[test]
480    #[should_panic(expected = "Oops...")]
481    fn panics_are_propagated() {
482        let sample = sample_path();
483        let mut chm = ChmFile::open(&sample).unwrap();
484
485        chm.for_each(Filter::all(), |_, _| panic!("Oops..."))
486            .unwrap();
487    }
488}