Skip to main content

yara_mapper/
indexer.rs

1use std::ffi::CStr;
2use std::path::{Path, PathBuf};
3use std::ptr::NonNull;
4
5use crate::error::YaraError;
6use crate::ffi_helpers::{collect_contig_lengths, collect_contig_names, path_to_cstring};
7
8/// Options for building a YARA FM index.
9#[derive(Debug, Clone, Default)]
10pub struct IndexerOptions {
11    /// Temporary directory for intermediate files during index construction.
12    /// `None` uses the output directory's parent.
13    pub tmp_dir: Option<PathBuf>,
14    /// Verbosity level (0 = silent, 1 = progress, 2 = debug).
15    pub verbose: u32,
16}
17
18/// Handle to a built YARA FM index.
19///
20/// After calling [`YaraIndexer::build`], the on-disk index files have been
21/// written and the handle retains contig metadata for querying.
22///
23/// # Thread safety
24///
25/// [`YaraIndexer`] is [`Send`] but not [`Sync`], consistent with
26/// [`crate::YaraMapper`].  Note that [`YaraIndexer::build`] modifies
27/// the `TMPDIR` environment variable (restored via RAII), so concurrent
28/// `build` calls from multiple threads are not safe.
29pub struct YaraIndexer {
30    handle: NonNull<yara_mapper_sys::YaraIndexerHandle>,
31}
32
33// SAFETY: The C++ handle owns all its memory and can be moved between threads.
34// Post-build accessors are read-only, but we keep !Sync for consistency with
35// YaraMapper and because build() modifies global state (TMPDIR).
36unsafe impl Send for YaraIndexer {}
37
38impl YaraIndexer {
39    /// Build a YARA FM index from a FASTA file.
40    ///
41    /// `fasta_path` is the path to the input FASTA/FASTQ reference file.
42    /// `output_prefix` is the file prefix for index output (e.g., `ref/genome`
43    /// produces `ref/genome.txt`, `ref/genome.rid`, `ref/genome.txt.size`,
44    /// `ref/genome.sa`, `ref/genome.lf`).
45    ///
46    /// # Errors
47    ///
48    /// Returns [`YaraError::IndexBuild`] if the FASTA file cannot be opened,
49    /// if the paths contain non-UTF-8 characters or null bytes, or if the
50    /// underlying C++ indexer fails (e.g., insufficient memory or disk space).
51    pub fn build<P: AsRef<Path>>(
52        fasta_path: P,
53        output_prefix: P,
54        options: &IndexerOptions,
55    ) -> Result<Self, YaraError> {
56        let fasta_cstr = path_to_cstring(fasta_path.as_ref(), "fasta_path", YaraError::IndexBuild)?;
57        let prefix_cstr =
58            path_to_cstring(output_prefix.as_ref(), "output_prefix", YaraError::IndexBuild)?;
59        let tmp_dir_cstr = options
60            .tmp_dir
61            .as_ref()
62            .map(|p| path_to_cstring(p, "tmp_dir", YaraError::IndexBuild))
63            .transpose()?;
64
65        let ffi_opts = yara_mapper_sys::YaraIndexerOptions {
66            output_prefix: prefix_cstr.as_ptr(),
67            tmp_dir: tmp_dir_cstr.as_ref().map_or(std::ptr::null(), |c| c.as_ptr()),
68            #[expect(clippy::cast_possible_wrap, reason = "FFI boundary: verbose fits in i32")]
69            verbose: options.verbose as std::os::raw::c_int,
70        };
71
72        let mut error_buf = vec![0u8; 1024];
73
74        let handle = unsafe {
75            yara_mapper_sys::yara_indexer_build(
76                fasta_cstr.as_ptr(),
77                &ffi_opts,
78                error_buf.as_mut_ptr().cast(),
79                error_buf.len(),
80            )
81        };
82
83        NonNull::new(handle).map(|h| Self { handle: h }).ok_or_else(|| {
84            let msg = unsafe { CStr::from_ptr(error_buf.as_ptr().cast()) };
85            YaraError::IndexBuild(msg.to_string_lossy().into_owned())
86        })
87    }
88
89    /// Number of contigs in the built index.
90    #[must_use]
91    pub fn contig_count(&self) -> usize {
92        unsafe { yara_mapper_sys::yara_indexer_contig_count(self.handle.as_ptr()) }
93    }
94
95    /// Contig names from the indexed reference.
96    #[must_use]
97    pub fn contig_names(&self) -> Vec<String> {
98        let n = self.contig_count();
99        unsafe {
100            collect_contig_names(n, |i| {
101                yara_mapper_sys::yara_indexer_contig_name(self.handle.as_ptr(), i)
102            })
103        }
104    }
105
106    /// Contig lengths from the indexed reference.
107    #[must_use]
108    pub fn contig_lengths(&self) -> Vec<usize> {
109        let n = self.contig_count();
110        collect_contig_lengths(n, |i| unsafe {
111            yara_mapper_sys::yara_indexer_contig_length(self.handle.as_ptr(), i)
112        })
113    }
114}
115
116impl Drop for YaraIndexer {
117    fn drop(&mut self) {
118        unsafe { yara_mapper_sys::yara_indexer_close(self.handle.as_ptr()) }
119    }
120}