rustyms/
helper_functions.rs

1#![allow(dead_code)]
2
3use std::{
4    collections::HashMap,
5    hash::Hash,
6    num::{IntErrorKind, ParseIntError},
7    ops::{Bound, Range, RangeBounds},
8    path::Path,
9    str::FromStr,
10};
11
12use crate::sequence::SequencePosition;
13
14pub(crate) fn peptide_range_contains(
15    range: &impl RangeBounds<usize>,
16    peptide_length: usize,
17    position: SequencePosition,
18) -> bool {
19    match position {
20        SequencePosition::NTerm => range.start_index() == 0,
21        SequencePosition::Index(i) => range.contains(&i),
22        SequencePosition::CTerm => range.end_index(peptide_length) == peptide_length,
23    }
24}
25
26pub(crate) trait ResultExtensions<T, E> {
27    /// # Errors
28    /// If any of the errors contained within has an error.
29    fn flat_err(self) -> Result<T, E>;
30}
31
32impl<T, E> ResultExtensions<T, E> for Result<T, Result<T, E>> {
33    fn flat_err(self) -> Result<T, E> {
34        match self {
35            Ok(o) => Ok(o),
36            Err(r) => r,
37        }
38    }
39}
40
41impl<T, E> ResultExtensions<T, E> for Result<Result<T, E>, E> {
42    fn flat_err(self) -> Result<T, E> {
43        match self {
44            Ok(o) => o,
45            Err(r) => Err(r),
46        }
47    }
48}
49
50pub(crate) trait InvertResult<T, E> {
51    /// # Errors
52    /// If any of the errors contained within has an error.
53    fn invert(self) -> Result<Option<T>, E>;
54}
55
56impl<T, E> InvertResult<T, E> for Option<Result<T, E>> {
57    fn invert(self) -> Result<Option<T>, E> {
58        self.map_or_else(|| Ok(None), |o| o.map(|v| Some(v)))
59    }
60}
61impl<T, E> InvertResult<T, E> for Option<Option<Result<T, E>>> {
62    fn invert(self) -> Result<Option<T>, E> {
63        self.flatten()
64            .map_or_else(|| Ok(None), |o| o.map(|v| Some(v)))
65    }
66}
67impl<T, E> InvertResult<T, E> for Option<Result<Option<T>, E>> {
68    fn invert(self) -> Result<Option<T>, E> {
69        self.map_or_else(|| Ok(None), |o| o)
70    }
71}
72
73pub(crate) trait RangeExtension
74where
75    Self: Sized,
76{
77    fn start_index(&self) -> usize;
78    // Give the max of the end index (inclusive) or the upper bound
79    fn end_index(&self, upper_bound: usize) -> usize;
80    fn bounds(&self, upper_bound: usize) -> (usize, usize) {
81        (self.start_index(), self.end_index(upper_bound))
82    }
83}
84
85impl<Ra: RangeBounds<usize>> RangeExtension for Ra {
86    fn start_index(&self) -> usize {
87        match self.start_bound() {
88            Bound::Unbounded => 0,
89            Bound::Included(s) => *s,
90            Bound::Excluded(s) => s + 1,
91        }
92    }
93
94    fn end_index(&self, upper_bound: usize) -> usize {
95        match self.end_bound() {
96            Bound::Unbounded => upper_bound,
97            Bound::Included(s) => *s.min(&upper_bound),
98            Bound::Excluded(s) => ((*s).saturating_sub(1)).min(upper_bound),
99        }
100    }
101}
102
103pub(crate) trait RangeMaths<Other>
104where
105    Self: Sized,
106{
107    fn add_start(&self, amount: Other) -> Self;
108    fn add_end(&self, amount: Other) -> Self;
109    fn sub_start(&self, amount: Other) -> Self;
110    fn sub_end(&self, amount: Other) -> Self;
111}
112
113impl RangeMaths<isize> for Range<usize> {
114    fn add_start(&self, amount: isize) -> Self {
115        let new_start = self.start.saturating_add_signed(amount);
116        Self {
117            start: new_start,
118            end: self.end.max(new_start),
119        }
120    }
121    fn add_end(&self, amount: isize) -> Self {
122        let new_end = self.end.saturating_add_signed(amount);
123        Self {
124            start: self.start.min(new_end),
125            end: new_end,
126        }
127    }
128    fn sub_start(&self, amount: isize) -> Self {
129        let new_start = self.start.saturating_add_signed(-amount);
130        Self {
131            start: new_start,
132            end: self.end.max(new_start),
133        }
134    }
135    fn sub_end(&self, amount: isize) -> Self {
136        let new_end = self.end.saturating_add_signed(-amount);
137        Self {
138            start: self.start.min(new_end),
139            end: new_end,
140        }
141    }
142}
143
144impl RangeMaths<usize> for Range<usize> {
145    fn add_start(&self, amount: usize) -> Self {
146        let new_start = self.start.saturating_add(amount);
147        Self {
148            start: new_start,
149            end: self.end.max(new_start),
150        }
151    }
152    fn add_end(&self, amount: usize) -> Self {
153        let new_end = self.end.saturating_add(amount);
154        Self {
155            start: self.start.min(new_end),
156            end: new_end,
157        }
158    }
159    fn sub_start(&self, amount: usize) -> Self {
160        let new_start = self.start.saturating_add(amount);
161        Self {
162            start: new_start,
163            end: self.end.max(new_start),
164        }
165    }
166    fn sub_end(&self, amount: usize) -> Self {
167        let new_end = self.end.saturating_sub(amount);
168        Self {
169            start: self.start.min(new_end),
170            end: new_end,
171        }
172    }
173}
174
175/// # Errors
176/// If the name cannot be recognised or a number is not valid.
177pub(crate) fn parse_named_counter<T: Clone>(
178    value: &str,
179    names: &[(String, T)],
180    allow_negative: bool,
181) -> Result<Vec<(T, isize)>, String> {
182    let mut index = 0;
183    let mut output = Vec::new();
184    while index < value.len() {
185        if value[index..].starts_with(' ') {
186            index += 1;
187        } else {
188            let mut found = false;
189            for name in names {
190                if value[index..].starts_with(&name.0) {
191                    index += name.0.len();
192                    let num = &value[index..]
193                        .chars()
194                        .skip_while(char::is_ascii_whitespace)
195                        .take_while(|c| c.is_ascii_digit() || (allow_negative && *c == '-'))
196                        .collect::<String>()
197                        .trim()
198                        .to_string();
199                    if num.is_empty() {
200                        output.push((name.1.clone(), 1));
201                    } else {
202                        output.push((
203                            name.1.clone(),
204                            num.parse()
205                                .map_err(|_| format!("Not a valid number '{num}'"))?,
206                        ));
207                        index += num.len()
208                            + value[index..]
209                                .chars()
210                                .take_while(char::is_ascii_whitespace)
211                                .count();
212                    }
213                    found = true;
214                    break; // Names loop
215                }
216            }
217            if !found {
218                return Err(format!("Name not recognised {}", &value[index..]));
219            }
220        }
221    }
222    Ok(output)
223}
224
225/// Split a string into chunks of text separated by whitespace with the offset before each chunk returned for nice error generation.
226pub(crate) fn split_ascii_whitespace(input: &str) -> Vec<(usize, &str)> {
227    let mut index = input.chars().take_while(char::is_ascii_whitespace).count();
228    let mut chunks = Vec::new();
229    while index < input.len() {
230        let chunk_len = input[index..]
231            .chars()
232            .take_while(|c| !c.is_ascii_whitespace())
233            .count();
234        chunks.push((index, &input[index..index + chunk_len]));
235        index += chunk_len;
236        index += input[index..]
237            .chars()
238            .take_while(char::is_ascii_whitespace)
239            .count();
240    }
241    chunks
242}
243
244/// Helper function to check extensions in filenames
245pub(crate) fn check_extension(filename: impl AsRef<Path>, extension: impl AsRef<Path>) -> bool {
246    filename
247        .as_ref()
248        .extension()
249        .is_some_and(|ext| ext.eq_ignore_ascii_case(extension.as_ref()))
250}
251
252/// Get the index of the next copy of the given char (looking at the byte value, does not guarantee full character)
253pub(crate) fn next_char(chars: &[u8], start: usize, char: u8) -> Option<usize> {
254    for (i, ch) in chars[start..].iter().enumerate() {
255        if *ch == char {
256            return Some(start + i);
257        }
258    }
259    None
260}
261
262/// Find the enclosed text by the given symbols, assumes a single open is already read just before the start, guarantees to only pick full characters
263pub(crate) fn end_of_enclosure(text: &str, start: usize, open: u8, close: u8) -> Option<usize> {
264    let mut state = 1;
265    for (i, ch) in text.as_bytes()[start..].iter().enumerate() {
266        // Check if this byte is a full character (is_char_boundary also works on index==len)
267        if text.is_char_boundary(start + i) && text.is_char_boundary(start + i + 1) {
268            if *ch == open {
269                state += 1;
270            } else if *ch == close {
271                state -= 1;
272                if state == 0 {
273                    return Some(start + i);
274                }
275            }
276        }
277    }
278    None
279}
280
281/// Find the enclosed text by the given symbols, assumes a single open is already read just before the start.
282/// This also takes brackets '[]' into account and these take precedence over the enclosure searched for.
283pub(crate) fn end_of_enclosure_with_brackets(
284    text: &str,
285    start: usize,
286    open: u8,
287    close: u8,
288) -> Option<usize> {
289    let mut state = 1;
290    let mut index = start;
291    while index < text.len() {
292        if !text.is_char_boundary(index) {
293            index += 1;
294            continue;
295        }
296        if index + 1 < text.len() && !text.is_char_boundary(index + 1) {
297            index += 1;
298            continue;
299        }
300        let ch = text.as_bytes()[index];
301        if ch == b'[' {
302            index = end_of_enclosure(text, index + 1, b'[', b']')?;
303        }
304        if ch == open {
305            state += 1;
306        } else if ch == close {
307            state -= 1;
308            if state == 0 {
309                return Some(index);
310            }
311        }
312        index += 1;
313    }
314    None
315}
316
317/// Split the given range based on the separator.
318/// This also takes brackets into account and these take precedence over the separator searched for.
319pub(crate) fn split_with_brackets(
320    text: &str,
321    range: Range<usize>,
322    separator: u8,
323    open: u8,
324    close: u8,
325) -> Vec<Range<usize>> {
326    let mut state: usize = 0;
327    let mut index = range.start;
328    let mut last_field = range.start;
329    let mut fields = Vec::new();
330    while index < range.end {
331        if !text.is_char_boundary(index) {
332            index += 1;
333            continue;
334        }
335        if index + 1 < text.len() && !text.is_char_boundary(index + 1) {
336            index += 1;
337            continue;
338        }
339        let ch = text.as_bytes()[index];
340        if ch == open {
341            state += 1;
342        } else if ch == close {
343            state = state.saturating_sub(1);
344        } else if ch == separator && state == 0 {
345            fields.push(last_field..index);
346            last_field = index + 1;
347        }
348        index += 1;
349    }
350    fields.push(last_field..index);
351    fields
352}
353
354#[test]
355#[allow(clippy::missing_panics_doc)]
356fn test_split_with_brackets() {
357    assert_eq!(
358        split_with_brackets(
359            "23-CHEMMOD:+15.995,23-[MS, MS:1001524, fragment neutral loss, 63.998285]",
360            0..72,
361            b',',
362            b'[',
363            b']'
364        ),
365        vec![0..18, 19..72]
366    );
367    assert_eq!(
368        split_with_brackets(
369            "0[MS,MS:1001876, modification probability, 0.1]|23[MS,MS:1001876, modification probability, 0.9]-UNIMOD:35",
370            0..106,
371            b',',
372            b'[',
373            b']'
374        ),
375        vec![0..106]
376    );
377    assert_eq!(
378        split_with_brackets("0[,,,[,,]],,[,,l;]hj", 0..20, b',', b'[', b']'),
379        vec![0..10, 11..11, 12..20]
380    );
381}
382
383/// Get the next number, returns length in bytes and the number.
384/// # Panics
385/// If the text is not valid UTF-8.
386/// # Errors
387/// Returns none if the number is too big to fit in a `isize`.
388pub(crate) fn next_num(
389    chars: &[u8],
390    mut start: usize,
391    allow_only_sign: bool,
392) -> Option<(usize, isize)> {
393    let mut sign = 1;
394    let mut sign_set = false;
395    if chars.get(start) == Some(&b'-') {
396        sign = -1;
397        start += 1;
398        sign_set = true;
399    } else if chars.get(start) == Some(&b'+') {
400        start += 1;
401        sign_set = true;
402    }
403    let len = chars[start..]
404        .iter()
405        .take_while(|c| c.is_ascii_digit())
406        .count();
407    if len == 0 {
408        if allow_only_sign && sign_set {
409            Some((1, sign))
410        } else {
411            None
412        }
413    } else {
414        let num: isize = std::str::from_utf8(&chars[start..start + len])
415            .unwrap()
416            .parse()
417            .ok()?;
418        Some((usize::from(sign_set) + len, sign * num))
419    }
420}
421
422/// A number of characters, used as length or index
423pub(crate) type Characters = usize;
424
425/// Get the next number starting at the byte range given, returns length in bytes, boolean indicating if the number is positive, and the number.
426/// # Errors
427/// Returns none if the number is too big to fit in a `Number`.
428pub(crate) fn next_number<const ALLOW_SIGN: bool, const FLOATING_POINT: bool, Number: FromStr>(
429    line: &str,
430    range: impl RangeBounds<usize>,
431) -> Option<(usize, bool, Result<Number, Number::Err>)> {
432    let start = range.start_index();
433    let end = range.end_index(line.len() - 1);
434    let mut positive = true;
435    let mut sign_set = false;
436    let mut chars = line[start..=end].char_indices().peekable();
437    if ALLOW_SIGN {
438        match chars.peek() {
439            Some((_, '-')) => {
440                positive = false;
441                sign_set = true;
442            }
443            Some((_, '+')) => {
444                sign_set = true;
445            }
446            _ => (),
447        }
448        if sign_set {
449            let _ = chars.next();
450        }
451    }
452
453    let mut consumed = usize::from(sign_set);
454    chars
455        .take_while(|(_, c)| {
456            if c.is_ascii_digit() || (FLOATING_POINT && ".eE+-".contains(*c)) {
457                consumed += 1;
458                true
459            } else {
460                false
461            }
462        })
463        .last()
464        .map(|(end_index, c)| {
465            (
466                consumed,
467                positive,
468                line[start..start + end_index + c.len_utf8()].parse::<Number>(),
469            )
470        })
471}
472
473/// Get a canonicalised u64 for f64 to be able to hash f64, based on the `ordered_float` crate (MIT license)
474pub(crate) fn f64_bits(value: f64) -> u64 {
475    if value.is_nan() {
476        0x7ff8_0000_0000_0000_u64 // CANONICAL_NAN_BITS
477    } else {
478        (value + 0.0).to_bits() // The +0.0 is to guarantee even handling of negative and positive zero
479    }
480}
481
482pub(crate) fn merge_hashmap<K, V>(one: HashMap<K, V>, two: HashMap<K, V>) -> HashMap<K, V>
483where
484    V: std::ops::MulAssign + Default,
485    K: Eq + Hash,
486{
487    let mut new = one;
488    for (key, value) in two {
489        let v = new.entry(key).or_default();
490        *v *= value;
491    }
492    new
493}
494
495/// Implement a binary operator for all ref cases after the implementation for the ref-ref case (assumes deref operator works)
496macro_rules! impl_binop_ref_cases {
497    (impl $imp:ident, $method:ident for $t:ty, $u:ty, $o:ty) => {
498        impl $imp<$u> for &'_ $t {
499            type Output = $o;
500
501            #[inline]
502            fn $method(self, other: $u) -> $o {
503                $imp::$method(self, &other)
504            }
505        }
506
507        impl<'a> $imp<&'a $u> for $t {
508            type Output = $o;
509
510            #[inline]
511            fn $method(self, other: &'a $u) -> $o {
512                $imp::$method(&self, other)
513            }
514        }
515
516        impl $imp<$u> for $t {
517            type Output = $o;
518
519            #[inline]
520            fn $method(self, other: $u) -> $o {
521                $imp::$method(&self, &other)
522            }
523        }
524    };
525}
526
527/// To be used as `The xx number ` + the explanation from here (does not have a dot).
528pub(crate) const fn explain_number_error(error: &ParseIntError) -> &'static str {
529    match error.kind() {
530        IntErrorKind::Empty => "is empty",
531        IntErrorKind::InvalidDigit => "contains an invalid character",
532        IntErrorKind::NegOverflow => "is too small to fit in the internal representation",
533        IntErrorKind::PosOverflow => "is too big to fit in the internal representation",
534        IntErrorKind::Zero => "is zero, which is not allowed here",
535        _ => "is not a valid number",
536    }
537}
538
539/// Check if two strings are equal with or without ignoring casing
540pub(crate) fn str_eq(a: &str, b: &str, ignore_casing: bool) -> bool {
541    if ignore_casing {
542        a.eq_ignore_ascii_case(b)
543    } else {
544        a == b
545    }
546}
547
548/// Check if 'a' starts with 'b' with or without ignoring casing
549pub(crate) fn str_starts_with(a: &str, b: &str, ignore_casing: bool) -> bool {
550    for (a, b) in a.chars().zip(b.chars()) {
551        if ignore_casing && !a.eq_ignore_ascii_case(&b) || !ignore_casing && a != b {
552            return false;
553        }
554    }
555    a.len() >= b.len()
556}
557
558#[allow(clippy::missing_panics_doc)]
559#[test]
560fn starts_with() {
561    assert!(str_starts_with("aaabbb", "a", false));
562    assert!(str_starts_with("aaabbb", "aa", false));
563    assert!(str_starts_with("aaabbb", "aaa", false));
564    assert!(!str_starts_with("aaabbb", "b", false));
565    assert!(!str_starts_with("aaabbb", "ab", false));
566    assert!(!str_starts_with("aaabbb", "aab", false));
567    assert!(str_starts_with("aaabbb", "a", true));
568    assert!(str_starts_with("aaabbb", "aa", true));
569    assert!(str_starts_with("aaabbb", "aaa", true));
570    assert!(str_starts_with("aaabbb", "A", true));
571    assert!(str_starts_with("aaabbb", "AA", true));
572    assert!(str_starts_with("aaabbb", "AAA", true));
573    assert!(str_starts_with("aaabbb", "aaA", true));
574    assert!(!str_starts_with("aaabbb", "A", false));
575    assert!(!str_starts_with("aaabbb", "AA", false));
576    assert!(!str_starts_with("aaabbb", "AAA", false));
577    assert!(!str_starts_with("aaabbb", "aaA", false));
578}