nanalogue 0.1.11

BAM/Mod BAM parsing and analysis tool with a single-molecule focus
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! `PathOrURLOrStdin` enum for handling input sources
//! Represents stdin, file paths, or URLs as input sources

use crate::{Error, InputBam, InputBamBuilder};
use serde::{Deserialize, Serialize};
use std::fmt;
use std::path::PathBuf;
use std::str::FromStr;
use url::Url;

/// Represents different input sources: stdin, file path, or URL.
///
/// This enum allows flexible input handling where data can come from:
/// - Standard input (represented by "-")
/// - A local file path (existence is not validated during parsing)
/// - A remote URL (only http, https, and ftp schemes are recognized)
///
/// # Examples
///
/// ```
/// use nanalogue_core::PathOrURLOrStdin;
/// use std::str::FromStr;
///
/// // Parse from stdin marker
/// let stdin = PathOrURLOrStdin::from_str("-")?;
/// assert!(matches!(stdin, PathOrURLOrStdin::Stdin));
///
/// // Parse from URL
/// let url = PathOrURLOrStdin::from_str("https://example.com/data.bam")?;
/// assert!(matches!(url, PathOrURLOrStdin::URL(_)));
///
/// // Parse from file path (existence not checked during parsing)
/// let path = PathOrURLOrStdin::from_str("examples/example_1.bam")?;
/// assert!(matches!(path, PathOrURLOrStdin::Path(_)));
///
/// // Non-existent paths are also accepted (I/O errors occur at use-time)
/// let path = PathOrURLOrStdin::from_str("/nonexistent/file.txt")?;
/// assert!(matches!(path, PathOrURLOrStdin::Path(_)));
///
/// # Ok::<(), nanalogue_core::Error>(())
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[non_exhaustive]
#[serde(try_from = "PathOrURLOrStdinShadow")]
pub enum PathOrURLOrStdin {
    /// Standard input
    #[default]
    Stdin,
    /// A local file path
    Path(PathBuf),
    /// A URL
    URL(Url),
}

/// Schemes accepted by the URL variant. Kept in sync with the equivalent
/// list in `FromStr::from_str` to ensure JSON deserialization cannot
/// produce a `URL(_)` variant containing a non-allow-listed scheme such as
/// `file://` (which `hts_open` would happily dereference as a local file).
const ALLOWED_NETWORK_SCHEMES: &[&str] = &["http", "https", "ftp"];

/// Shadow type used solely by serde to validate `PathOrURLOrStdin`
/// deserialization. Routes the raw payload through the same allow-list /
/// stdin-marker checks as [`PathOrURLOrStdin::from_str`].
#[derive(Deserialize)]
#[expect(
    clippy::upper_case_acronyms,
    reason = "Shadow enum must preserve the public URL variant name for serde compatibility"
)]
enum PathOrURLOrStdinShadow {
    /// Standard input
    Stdin,
    /// A local file path
    Path(PathBuf),
    /// A URL
    URL(Url),
}

impl TryFrom<PathOrURLOrStdinShadow> for PathOrURLOrStdin {
    type Error = Error;

    fn try_from(value: PathOrURLOrStdinShadow) -> Result<Self, Self::Error> {
        match value {
            PathOrURLOrStdinShadow::Stdin => Ok(PathOrURLOrStdin::Stdin),
            PathOrURLOrStdinShadow::Path(p) => {
                // Disallow the literal stdin marker inside Path; FromStr would
                // route "-" to `Stdin`, so accepting it here would create an
                // unreachable-via-FromStr variant state.
                if p.as_os_str() == "-" {
                    Err(Error::InvalidState(
                        "`-` is reserved for Stdin and is not a valid Path variant".to_owned(),
                    ))
                } else {
                    Ok(PathOrURLOrStdin::Path(p))
                }
            }
            PathOrURLOrStdinShadow::URL(u) => {
                if ALLOWED_NETWORK_SCHEMES.contains(&u.scheme()) {
                    Ok(PathOrURLOrStdin::URL(u))
                } else {
                    Err(Error::InvalidState(format!(
                        "URL scheme `{}` is not in the allow-list ({}); use a Path variant instead",
                        u.scheme(),
                        ALLOWED_NETWORK_SCHEMES.join(", ")
                    )))
                }
            }
        }
    }
}

impl FromStr for PathOrURLOrStdin {
    type Err = Error;

    /// Parses a string into a `PathOrURLOrStdin` variant.
    ///
    /// The parsing logic follows this order:
    /// 1. If the string is "-", returns `Stdin`
    /// 2. If the string is a valid URL with an allowed network scheme (http, https, ftp), returns `URL(parsed_url)`
    /// 3. Otherwise, returns `Path(parsed_path)` for any syntactically valid path
    ///
    /// **Note**: This method performs parsing only and does not validate file existence.
    /// I/O errors will be surfaced when the path is actually used. This avoids TOCTOU
    /// (Time-of-check to time-of-use) race conditions.
    ///
    /// # Examples
    ///
    /// ```
    /// use nanalogue_core::PathOrURLOrStdin;
    /// use std::str::FromStr;
    ///
    /// // Stdin
    /// let input = PathOrURLOrStdin::from_str("-")?;
    /// assert!(matches!(input, PathOrURLOrStdin::Stdin));
    ///
    /// // URL
    /// let input = PathOrURLOrStdin::from_str("https://example.com/file.txt")?;
    /// assert!(matches!(input, PathOrURLOrStdin::URL(_)));
    ///
    /// // Path (even if it doesn't exist yet)
    /// let input = PathOrURLOrStdin::from_str("/path/to/file.txt")?;
    /// assert!(matches!(input, PathOrURLOrStdin::Path(_)));
    ///
    /// # Ok::<(), nanalogue_core::Error>(())
    /// ```
    ///
    /// # Errors
    ///
    /// This method should not fail for typical input strings. Parsing is lenient
    /// and treats most inputs as valid paths.
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        // Check for stdin marker
        if s == "-" {
            return Ok(PathOrURLOrStdin::Stdin);
        }

        // Try to parse as URL with allowed network schemes
        if let Ok(parsed_url) = Url::parse(s) {
            // Only accept known network schemes to avoid misclassifying local paths
            if ALLOWED_NETWORK_SCHEMES.contains(&parsed_url.scheme()) {
                return Ok(PathOrURLOrStdin::URL(parsed_url));
            }
            // If it's a valid URL but with an unsupported scheme, fall through to treat as path
        }

        // Otherwise, treat as path (don't check existence to avoid TOCTOU)
        let path = PathBuf::from(s);
        Ok(PathOrURLOrStdin::Path(path))
    }
}

impl fmt::Display for PathOrURLOrStdin {
    /// display "-" or the underlying path or URL.
    /// If the path contains non-UTF-8 characters, you may not get a valid display
    #[expect(
        clippy::pattern_type_mismatch,
        reason = "&self/self/etc. does not make a difference to readability here"
    )]
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            PathOrURLOrStdin::Stdin => String::from("-"),
            PathOrURLOrStdin::Path(v) => v.to_string_lossy().to_string(),
            PathOrURLOrStdin::URL(v) => v.to_string(),
        }
        .fmt(f)
    }
}

impl From<PathOrURLOrStdin> for InputBam {
    /// Converts a `PathOrURLOrStdin` into an `InputBam` with default settings.
    ///
    /// This creates an `InputBam` with the given BAM path and all other fields set to their defaults.
    fn from(val: PathOrURLOrStdin) -> Self {
        InputBamBuilder::default()
            .bam_path(val)
            .build()
            .expect("InputBam builder should not fail with only bam_path set")
    }
}

#[cfg(test)]
#[expect(
    clippy::panic,
    reason = "panic is acceptable in tests for assertion failures"
)]
mod tests {
    use super::*;
    use crate::uuid;
    use std::fs::File;
    use std::io::Write as _;

    #[test]
    fn from_str_parses_stdin() {
        let result = PathOrURLOrStdin::from_str("-").expect("should parse stdin");
        assert!(matches!(result, PathOrURLOrStdin::Stdin));
    }

    #[test]
    fn from_str_parses_url() {
        let result =
            PathOrURLOrStdin::from_str("https://example.com/file.bam").expect("should parse URL");
        match result {
            PathOrURLOrStdin::URL(u) => {
                assert_eq!(u.scheme(), "https");
                assert_eq!(u.host_str(), Some("example.com"));
                assert_eq!(u.path(), "/file.bam");
            }
            PathOrURLOrStdin::Stdin | PathOrURLOrStdin::Path(_) => {
                panic!("Expected URL variant")
            }
        }
    }

    #[test]
    fn from_str_parses_http_url() {
        let result =
            PathOrURLOrStdin::from_str("http://example.com/data").expect("should parse URL");
        match result {
            PathOrURLOrStdin::URL(u) => {
                assert_eq!(u.scheme(), "http");
            }
            PathOrURLOrStdin::Stdin | PathOrURLOrStdin::Path(_) => {
                panic!("Expected URL variant")
            }
        }
    }

    #[test]
    fn from_str_parses_existing_path() {
        // Create a temporary file with a random UUID name in platform temp directory
        let temp_dir = std::env::temp_dir();
        let temp_filename = temp_dir.join(format!("nanalogue_test_{}.txt", uuid::v4_random()));
        {
            let mut file = File::create(&temp_filename).expect("should create temp file");
            file.write_all(b"test content")
                .expect("should write to file");
        }

        let result = PathOrURLOrStdin::from_str(
            temp_filename
                .to_str()
                .expect("temp path should be valid UTF-8"),
        )
        .expect("should parse path");
        match result {
            PathOrURLOrStdin::Path(p) => {
                assert_eq!(p, temp_filename);
            }
            PathOrURLOrStdin::Stdin | PathOrURLOrStdin::URL(_) => {
                panic!("Expected Path variant")
            }
        }

        // Clean up
        std::fs::remove_file(&temp_filename).expect("should remove temp file");
    }

    #[test]
    fn from_str_accepts_nonexistent_path() {
        // Use a random UUID to ensure the path doesn't exist
        let nonexistent_path = format!("/nonexistent/path/to/{}.txt", uuid::v4_random());
        let result =
            PathOrURLOrStdin::from_str(&nonexistent_path).expect("should accept nonexistent path");
        assert!(
            matches!(result, PathOrURLOrStdin::Path(_)),
            "Expected Path variant for non-existent path"
        );
    }

    #[test]
    fn from_str_accepts_any_string_as_path() {
        // Any string that's not "-" and not a valid network URL should be treated as a path
        let result =
            PathOrURLOrStdin::from_str("not a url or valid path").expect("should accept as path");
        assert!(
            matches!(result, PathOrURLOrStdin::Path(_)),
            "Expected Path variant for arbitrary string"
        );
    }

    #[test]
    fn default_is_stdin() {
        let default_val = PathOrURLOrStdin::default();
        assert!(matches!(default_val, PathOrURLOrStdin::Stdin));
    }

    #[test]
    fn url_scheme_variants() {
        // Test allowed network schemes
        let ftp_result = PathOrURLOrStdin::from_str("ftp://example.com/file.txt");
        assert!(
            matches!(ftp_result, Ok(PathOrURLOrStdin::URL(_))),
            "ftp:// should be recognized as URL"
        );

        // Test that non-network schemes like file:// are treated as paths
        let file_result = PathOrURLOrStdin::from_str("file:///path/to/file");
        assert!(
            matches!(file_result, Ok(PathOrURLOrStdin::Path(_))),
            "file:// scheme should be treated as Path, not URL"
        );

        // Test other unsupported schemes are also treated as paths
        let data_result = PathOrURLOrStdin::from_str("data:text/plain,hello");
        assert!(
            matches!(data_result, Ok(PathOrURLOrStdin::Path(_))),
            "data: scheme should be treated as Path"
        );

        // Windows-like paths should also be treated as paths
        let windows_path = PathOrURLOrStdin::from_str("C:/path/to/file.txt");
        assert!(
            matches!(windows_path, Ok(PathOrURLOrStdin::Path(_))),
            "Windows-like paths should be treated as Path"
        );
    }

    #[test]
    fn display_stdin() {
        let stdin = PathOrURLOrStdin::Stdin;
        assert_eq!(stdin.to_string(), "-");
    }

    #[test]
    fn display_path() {
        let path = PathOrURLOrStdin::Path("/some/path/to/file.bam".into());
        assert_eq!(path.to_string(), "/some/path/to/file.bam");
    }

    #[test]
    fn display_url() {
        let url = PathOrURLOrStdin::URL(Url::parse("https://example.com/data.bam").unwrap());
        assert_eq!(url.to_string(), "https://example.com/data.bam");
    }

    #[test]
    fn from_path_or_url_or_stdin_to_input_bam_stdin() {
        let input = PathOrURLOrStdin::Stdin;
        let bam: InputBam = input.into();
        assert_eq!(bam.bam_path, PathOrURLOrStdin::Stdin);
    }

    #[test]
    fn from_path_or_url_or_stdin_to_input_bam_path() {
        let input = PathOrURLOrStdin::Path("/some/path.bam".into());
        let bam: InputBam = input.clone().into();
        assert_eq!(bam.bam_path, input);
    }

    #[test]
    fn from_path_or_url_or_stdin_to_input_bam_url() {
        let url = Url::parse("https://example.com/data.bam").unwrap();
        let input = PathOrURLOrStdin::URL(url.clone());
        let bam: InputBam = input.clone().into();
        assert_eq!(bam.bam_path, input);
    }

    /// JSON deserialization must enforce the same URL-scheme allow-list as
    /// `FromStr`.
    #[test]
    fn deserialize_rejects_file_url_scheme() {
        let bad: Result<PathOrURLOrStdin, _> =
            serde_json::from_str(r#"{"URL":"file:///etc/passwd"}"#);
        let _: serde_json::Error = bad.unwrap_err();
    }

    /// JSON deserialization must also reject other non-allow-listed schemes.
    #[test]
    fn deserialize_rejects_other_disallowed_schemes() {
        for payload in [
            r#"{"URL":"ssh://host/path"}"#,
            r#"{"URL":"data:text/plain,hello"}"#,
        ] {
            let bad: Result<PathOrURLOrStdin, _> = serde_json::from_str(payload);
            assert!(
                bad.is_err(),
                "expected deserialize to reject payload `{payload}`",
            );
        }
    }

    /// Deserialization must reject `"-"` inside the `Path` variant — that
    /// value is the stdin marker and is unreachable via the documented
    /// `FromStr` parser.
    #[test]
    fn deserialize_rejects_dash_inside_path_variant() {
        let bad: Result<PathOrURLOrStdin, _> = serde_json::from_str(r#"{"Path":"-"}"#);
        let _: serde_json::Error = bad.unwrap_err();
    }

    /// Allow-listed schemes still deserialize successfully.
    #[test]
    fn deserialize_accepts_allowed_schemes() {
        let good: PathOrURLOrStdin =
            serde_json::from_str(r#"{"URL":"https://example.com/file.bam"}"#)
                .expect("https should deserialize");
        assert!(matches!(good, PathOrURLOrStdin::URL(_)));
    }
}