nohuman/
lib.rs

1pub mod compression;
2pub mod download;
3
4use log::{debug, info};
5use std::ffi::OsStr;
6use std::io::{self};
7use std::num::ParseIntError;
8use std::path::{Path, PathBuf};
9use std::process::Command;
10
11pub struct CommandRunner {
12    pub command: String,
13}
14
15impl CommandRunner {
16    pub fn new(command: &str) -> Self {
17        Self {
18            command: command.to_string(),
19        }
20    }
21
22    pub fn run(&self, args: &[&str]) -> io::Result<()> {
23        let output = Command::new(&self.command).args(args).output()?;
24
25        let stderr_log = String::from_utf8_lossy(&output.stderr);
26        if !output.status.success() {
27            return Err(io::Error::other(format!(
28                "{} failed with stderr {}",
29                self.command, stderr_log
30            )));
31        }
32
33        debug!("kraken2 stderr:\n {}", stderr_log);
34
35        let (total, classified, unclassified) =
36            parse_kraken_stderr(&stderr_log).unwrap_or((0, 0, 0));
37
38        info!(
39            "{} / {} ({:.2}%) sequences classified as human; {} ({:.2}%) as non-human",
40            classified,
41            total,
42            (classified as f64 / total as f64) * 100.0,
43            unclassified,
44            (unclassified as f64 / total as f64) * 100.0
45        );
46
47        Ok(())
48    }
49
50    pub fn is_executable(&self) -> bool {
51        let cmd = format!("command -v {}", &self.command);
52        let result = Command::new("sh").args(["-c", &cmd]).output();
53        match result {
54            Ok(output) => output.status.success(),
55            Err(_) => false,
56        }
57    }
58}
59
60/// Parses the kraken2 stderr to get thenumber of total, classified and unclassifed reads.
61fn parse_kraken_stderr(stderr: &str) -> Result<(usize, usize, usize), ParseIntError> {
62    let mut total_sequences: usize = 0;
63    let mut classified_sequences: usize = 0;
64    let mut unclassified_sequences: usize = 0;
65
66    // Parse Kraken2 stderr output line by line
67    for line in stderr.lines() {
68        if line.contains("processed") {
69            total_sequences = line
70                .split_whitespace()
71                .next()
72                .unwrap_or("0")
73                .replace(",", "") // Handle commas in large numbers
74                .parse::<usize>()?;
75        } else if line.contains("sequences classified") {
76            classified_sequences = line
77                .split_whitespace()
78                .next()
79                .unwrap_or("0")
80                .replace(",", "") // Handle commas in large numbers
81                .parse::<usize>()?;
82        } else if line.contains("sequences unclassified") {
83            unclassified_sequences = line
84                .split_whitespace()
85                .next()
86                .unwrap_or("0")
87                .replace(",", "") // Handle commas in large numbers
88                .parse::<usize>()?;
89        }
90    }
91
92    Ok((
93        total_sequences,
94        classified_sequences,
95        unclassified_sequences,
96    ))
97}
98
99/// A utility function that allows the CLI to error if a path doesn't exist
100pub fn check_path_exists<S: AsRef<OsStr> + ?Sized>(s: &S) -> Result<PathBuf, String> {
101    let path = PathBuf::from(s);
102    if path.exists() {
103        Ok(path)
104    } else {
105        Err(format!("{path:?} does not exist",))
106    }
107}
108
109/// Checks if the specified path is a directory and contains the required kraken2 db files.
110/// If not found, checks inside a 'db' subdirectory.
111///
112/// # Arguments
113///
114/// * `path` - A path to check for the required kraken2 db files.
115///
116/// # Returns
117///
118/// * `Result<PathBuf, String>` - Ok with the valid path if the files are found, Err otherwise.
119pub fn validate_db_directory(path: &Path) -> Result<PathBuf, String> {
120    let required_files = ["hash.k2d", "opts.k2d", "taxo.k2d"];
121    let files_str = required_files.join(", ");
122
123    // Check if the path is a directory and contains the required files
124    if path.is_dir() && required_files.iter().all(|file| path.join(file).exists()) {
125        return Ok(path.to_path_buf());
126    }
127
128    // Check inside a 'db' subdirectory
129    let db_path = path.join("db");
130    if db_path.is_dir()
131        && required_files
132            .iter()
133            .all(|file| db_path.join(file).exists())
134    {
135        return Ok(db_path);
136    }
137
138    Err(format!(
139        "Required files ({files_str}) not found in {path:?} or its 'db' subdirectory",
140    ))
141}
142
143/// Parse confidence score from the command line. Will be passed on to kraken2. Must be in the
144/// closed interval [0, 1] - i.e. 0 <= confidence <= 1.
145pub fn parse_confidence_score(s: &str) -> Result<f32, String> {
146    let confidence: f32 = s.parse().map_err(|_| "Confidence score must be a number")?;
147    if !(0.0..=1.0).contains(&confidence) {
148        return Err("Confidence score must be in the closed interval [0, 1]".to_string());
149    }
150    Ok(confidence)
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn test_new() {
159        let command = CommandRunner::new("ls");
160        assert_eq!(command.command, "ls");
161    }
162
163    #[test]
164    fn test_run() {
165        let command = CommandRunner::new("ls");
166        let result = command.run(&["-l"]);
167        assert!(result.is_ok());
168    }
169
170    #[test]
171    fn test_run_with_invalid_command() {
172        let command = CommandRunner::new("not-a-real-command");
173        let result = command.run(&["-l"]);
174        assert!(result.is_err());
175    }
176
177    #[test]
178    fn test_is_executable() {
179        let command = CommandRunner::new("ls");
180        assert!(command.is_executable());
181    }
182
183    #[test]
184    fn test_is_not_executable() {
185        let command = CommandRunner::new("not-a-real-command");
186        assert!(!command.is_executable());
187    }
188
189    #[test]
190    fn check_path_exists_it_doesnt() {
191        let result = check_path_exists(OsStr::new("fake.path"));
192        assert!(result.is_err())
193    }
194
195    #[test]
196    fn check_path_it_does() {
197        let actual = check_path_exists(OsStr::new("Cargo.toml")).unwrap();
198        let expected = PathBuf::from("Cargo.toml");
199        assert_eq!(actual, expected)
200    }
201
202    #[test]
203    fn test_parse_confidence_score() {
204        let result = parse_confidence_score("0.5");
205        assert!(result.is_ok());
206        assert_eq!(result.unwrap(), 0.5);
207
208        let result = parse_confidence_score("1.0");
209        assert!(result.is_ok());
210        assert_eq!(result.unwrap(), 1.0);
211
212        let result = parse_confidence_score("0.0");
213        assert!(result.is_ok());
214        assert_eq!(result.unwrap(), 0.0);
215
216        let result = parse_confidence_score("1.1");
217        assert!(result.is_err());
218
219        let result = parse_confidence_score("-0.1");
220        assert!(result.is_err());
221    }
222}