1#![allow(dead_code)]
2
3use std::{
4 collections::HashMap,
5 hash::Hash,
6 num::{IntErrorKind, ParseIntError},
7 ops::{Bound, Range, RangeBounds},
8 path::Path,
9 str::FromStr,
10};
11
12use crate::sequence::SequencePosition;
13
14pub(crate) fn peptide_range_contains(
15 range: &impl RangeBounds<usize>,
16 peptide_length: usize,
17 position: SequencePosition,
18) -> bool {
19 match position {
20 SequencePosition::NTerm => range.start_index() == 0,
21 SequencePosition::Index(i) => range.contains(&i),
22 SequencePosition::CTerm => range.end_index(peptide_length) == peptide_length,
23 }
24}
25
26pub(crate) trait ResultExtensions<T, E> {
27 fn flat_err(self) -> Result<T, E>;
30}
31
32impl<T, E> ResultExtensions<T, E> for Result<T, Result<T, E>> {
33 fn flat_err(self) -> Result<T, E> {
34 match self {
35 Ok(o) => Ok(o),
36 Err(r) => r,
37 }
38 }
39}
40
41impl<T, E> ResultExtensions<T, E> for Result<Result<T, E>, E> {
42 fn flat_err(self) -> Result<T, E> {
43 match self {
44 Ok(o) => o,
45 Err(r) => Err(r),
46 }
47 }
48}
49
50pub(crate) trait InvertResult<T, E> {
51 fn invert(self) -> Result<Option<T>, E>;
54}
55
56impl<T, E> InvertResult<T, E> for Option<Result<T, E>> {
57 fn invert(self) -> Result<Option<T>, E> {
58 self.map_or_else(|| Ok(None), |o| o.map(|v| Some(v)))
59 }
60}
61impl<T, E> InvertResult<T, E> for Option<Option<Result<T, E>>> {
62 fn invert(self) -> Result<Option<T>, E> {
63 self.flatten()
64 .map_or_else(|| Ok(None), |o| o.map(|v| Some(v)))
65 }
66}
67impl<T, E> InvertResult<T, E> for Option<Result<Option<T>, E>> {
68 fn invert(self) -> Result<Option<T>, E> {
69 self.map_or_else(|| Ok(None), |o| o)
70 }
71}
72
73pub(crate) trait RangeExtension
74where
75 Self: Sized,
76{
77 fn start_index(&self) -> usize;
78 fn end_index(&self, upper_bound: usize) -> usize;
80 fn bounds(&self, upper_bound: usize) -> (usize, usize) {
81 (self.start_index(), self.end_index(upper_bound))
82 }
83}
84
85impl<Ra: RangeBounds<usize>> RangeExtension for Ra {
86 fn start_index(&self) -> usize {
87 match self.start_bound() {
88 Bound::Unbounded => 0,
89 Bound::Included(s) => *s,
90 Bound::Excluded(s) => s + 1,
91 }
92 }
93
94 fn end_index(&self, upper_bound: usize) -> usize {
95 match self.end_bound() {
96 Bound::Unbounded => upper_bound,
97 Bound::Included(s) => *s.min(&upper_bound),
98 Bound::Excluded(s) => ((*s).saturating_sub(1)).min(upper_bound),
99 }
100 }
101}
102
103pub(crate) trait RangeMaths<Other>
104where
105 Self: Sized,
106{
107 fn add_start(&self, amount: Other) -> Self;
108 fn add_end(&self, amount: Other) -> Self;
109 fn sub_start(&self, amount: Other) -> Self;
110 fn sub_end(&self, amount: Other) -> Self;
111}
112
113impl RangeMaths<isize> for Range<usize> {
114 fn add_start(&self, amount: isize) -> Self {
115 let new_start = self.start.saturating_add_signed(amount);
116 Self {
117 start: new_start,
118 end: self.end.max(new_start),
119 }
120 }
121 fn add_end(&self, amount: isize) -> Self {
122 let new_end = self.end.saturating_add_signed(amount);
123 Self {
124 start: self.start.min(new_end),
125 end: new_end,
126 }
127 }
128 fn sub_start(&self, amount: isize) -> Self {
129 let new_start = self.start.saturating_add_signed(-amount);
130 Self {
131 start: new_start,
132 end: self.end.max(new_start),
133 }
134 }
135 fn sub_end(&self, amount: isize) -> Self {
136 let new_end = self.end.saturating_add_signed(-amount);
137 Self {
138 start: self.start.min(new_end),
139 end: new_end,
140 }
141 }
142}
143
144impl RangeMaths<usize> for Range<usize> {
145 fn add_start(&self, amount: usize) -> Self {
146 let new_start = self.start.saturating_add(amount);
147 Self {
148 start: new_start,
149 end: self.end.max(new_start),
150 }
151 }
152 fn add_end(&self, amount: usize) -> Self {
153 let new_end = self.end.saturating_add(amount);
154 Self {
155 start: self.start.min(new_end),
156 end: new_end,
157 }
158 }
159 fn sub_start(&self, amount: usize) -> Self {
160 let new_start = self.start.saturating_add(amount);
161 Self {
162 start: new_start,
163 end: self.end.max(new_start),
164 }
165 }
166 fn sub_end(&self, amount: usize) -> Self {
167 let new_end = self.end.saturating_sub(amount);
168 Self {
169 start: self.start.min(new_end),
170 end: new_end,
171 }
172 }
173}
174
175pub(crate) fn parse_named_counter<T: Clone>(
178 value: &str,
179 names: &[(String, T)],
180 allow_negative: bool,
181) -> Result<Vec<(T, isize)>, String> {
182 let mut index = 0;
183 let mut output = Vec::new();
184 while index < value.len() {
185 if value[index..].starts_with(' ') {
186 index += 1;
187 } else {
188 let mut found = false;
189 for name in names {
190 if value[index..].starts_with(&name.0) {
191 index += name.0.len();
192 let num = &value[index..]
193 .chars()
194 .skip_while(char::is_ascii_whitespace)
195 .take_while(|c| c.is_ascii_digit() || (allow_negative && *c == '-'))
196 .collect::<String>()
197 .trim()
198 .to_string();
199 if num.is_empty() {
200 output.push((name.1.clone(), 1));
201 } else {
202 output.push((
203 name.1.clone(),
204 num.parse()
205 .map_err(|_| format!("Not a valid number '{num}'"))?,
206 ));
207 index += num.len()
208 + value[index..]
209 .chars()
210 .take_while(char::is_ascii_whitespace)
211 .count();
212 }
213 found = true;
214 break; }
216 }
217 if !found {
218 return Err(format!("Name not recognised {}", &value[index..]));
219 }
220 }
221 }
222 Ok(output)
223}
224
225pub(crate) fn split_ascii_whitespace(input: &str) -> Vec<(usize, &str)> {
227 let mut index = input.chars().take_while(char::is_ascii_whitespace).count();
228 let mut chunks = Vec::new();
229 while index < input.len() {
230 let chunk_len = input[index..]
231 .chars()
232 .take_while(|c| !c.is_ascii_whitespace())
233 .count();
234 chunks.push((index, &input[index..index + chunk_len]));
235 index += chunk_len;
236 index += input[index..]
237 .chars()
238 .take_while(char::is_ascii_whitespace)
239 .count();
240 }
241 chunks
242}
243
244pub(crate) fn check_extension(filename: impl AsRef<Path>, extension: impl AsRef<Path>) -> bool {
246 filename
247 .as_ref()
248 .extension()
249 .is_some_and(|ext| ext.eq_ignore_ascii_case(extension.as_ref()))
250}
251
252pub(crate) fn next_char(chars: &[u8], start: usize, char: u8) -> Option<usize> {
254 for (i, ch) in chars[start..].iter().enumerate() {
255 if *ch == char {
256 return Some(start + i);
257 }
258 }
259 None
260}
261
262pub(crate) fn end_of_enclosure(text: &str, start: usize, open: u8, close: u8) -> Option<usize> {
264 let mut state = 1;
265 for (i, ch) in text.as_bytes()[start..].iter().enumerate() {
266 if text.is_char_boundary(start + i) && text.is_char_boundary(start + i + 1) {
268 if *ch == open {
269 state += 1;
270 } else if *ch == close {
271 state -= 1;
272 if state == 0 {
273 return Some(start + i);
274 }
275 }
276 }
277 }
278 None
279}
280
281pub(crate) fn end_of_enclosure_with_brackets(
284 text: &str,
285 start: usize,
286 open: u8,
287 close: u8,
288) -> Option<usize> {
289 let mut state = 1;
290 let mut index = start;
291 while index < text.len() {
292 if !text.is_char_boundary(index) {
293 index += 1;
294 continue;
295 }
296 if index + 1 < text.len() && !text.is_char_boundary(index + 1) {
297 index += 1;
298 continue;
299 }
300 let ch = text.as_bytes()[index];
301 if ch == b'[' {
302 index = end_of_enclosure(text, index + 1, b'[', b']')?;
303 }
304 if ch == open {
305 state += 1;
306 } else if ch == close {
307 state -= 1;
308 if state == 0 {
309 return Some(index);
310 }
311 }
312 index += 1;
313 }
314 None
315}
316
317pub(crate) fn split_with_brackets(
320 text: &str,
321 range: Range<usize>,
322 separator: u8,
323 open: u8,
324 close: u8,
325) -> Vec<Range<usize>> {
326 let mut state: usize = 0;
327 let mut index = range.start;
328 let mut last_field = range.start;
329 let mut fields = Vec::new();
330 while index < range.end {
331 if !text.is_char_boundary(index) {
332 index += 1;
333 continue;
334 }
335 if index + 1 < text.len() && !text.is_char_boundary(index + 1) {
336 index += 1;
337 continue;
338 }
339 let ch = text.as_bytes()[index];
340 if ch == open {
341 state += 1;
342 } else if ch == close {
343 state = state.saturating_sub(1);
344 } else if ch == separator && state == 0 {
345 fields.push(last_field..index);
346 last_field = index + 1;
347 }
348 index += 1;
349 }
350 fields.push(last_field..index);
351 fields
352}
353
354#[test]
355#[allow(clippy::missing_panics_doc)]
356fn test_split_with_brackets() {
357 assert_eq!(
358 split_with_brackets(
359 "23-CHEMMOD:+15.995,23-[MS, MS:1001524, fragment neutral loss, 63.998285]",
360 0..72,
361 b',',
362 b'[',
363 b']'
364 ),
365 vec![0..18, 19..72]
366 );
367 assert_eq!(
368 split_with_brackets(
369 "0[MS,MS:1001876, modification probability, 0.1]|23[MS,MS:1001876, modification probability, 0.9]-UNIMOD:35",
370 0..106,
371 b',',
372 b'[',
373 b']'
374 ),
375 vec![0..106]
376 );
377 assert_eq!(
378 split_with_brackets("0[,,,[,,]],,[,,l;]hj", 0..20, b',', b'[', b']'),
379 vec![0..10, 11..11, 12..20]
380 );
381}
382
383pub(crate) fn next_num(
389 chars: &[u8],
390 mut start: usize,
391 allow_only_sign: bool,
392) -> Option<(usize, isize)> {
393 let mut sign = 1;
394 let mut sign_set = false;
395 if chars.get(start) == Some(&b'-') {
396 sign = -1;
397 start += 1;
398 sign_set = true;
399 } else if chars.get(start) == Some(&b'+') {
400 start += 1;
401 sign_set = true;
402 }
403 let len = chars[start..]
404 .iter()
405 .take_while(|c| c.is_ascii_digit())
406 .count();
407 if len == 0 {
408 if allow_only_sign && sign_set {
409 Some((1, sign))
410 } else {
411 None
412 }
413 } else {
414 let num: isize = std::str::from_utf8(&chars[start..start + len])
415 .unwrap()
416 .parse()
417 .ok()?;
418 Some((usize::from(sign_set) + len, sign * num))
419 }
420}
421
422pub(crate) type Characters = usize;
424
425pub(crate) fn next_number<const ALLOW_SIGN: bool, const FLOATING_POINT: bool, Number: FromStr>(
429 line: &str,
430 range: impl RangeBounds<usize>,
431) -> Option<(usize, bool, Result<Number, Number::Err>)> {
432 let start = range.start_index();
433 let end = range.end_index(line.len() - 1);
434 let mut positive = true;
435 let mut sign_set = false;
436 let mut chars = line[start..=end].char_indices().peekable();
437 if ALLOW_SIGN {
438 match chars.peek() {
439 Some((_, '-')) => {
440 positive = false;
441 sign_set = true;
442 }
443 Some((_, '+')) => {
444 sign_set = true;
445 }
446 _ => (),
447 }
448 if sign_set {
449 let _ = chars.next();
450 }
451 }
452
453 let mut consumed = usize::from(sign_set);
454 chars
455 .take_while(|(_, c)| {
456 if c.is_ascii_digit() || (FLOATING_POINT && ".eE+-".contains(*c)) {
457 consumed += 1;
458 true
459 } else {
460 false
461 }
462 })
463 .last()
464 .map(|(end_index, c)| {
465 (
466 consumed,
467 positive,
468 line[start..start + end_index + c.len_utf8()].parse::<Number>(),
469 )
470 })
471}
472
473pub(crate) fn f64_bits(value: f64) -> u64 {
475 if value.is_nan() {
476 0x7ff8_0000_0000_0000_u64 } else {
478 (value + 0.0).to_bits() }
480}
481
482pub(crate) fn merge_hashmap<K, V>(one: HashMap<K, V>, two: HashMap<K, V>) -> HashMap<K, V>
483where
484 V: std::ops::MulAssign + Default,
485 K: Eq + Hash,
486{
487 let mut new = one;
488 for (key, value) in two {
489 let v = new.entry(key).or_default();
490 *v *= value;
491 }
492 new
493}
494
495macro_rules! impl_binop_ref_cases {
497 (impl $imp:ident, $method:ident for $t:ty, $u:ty, $o:ty) => {
498 impl $imp<$u> for &'_ $t {
499 type Output = $o;
500
501 #[inline]
502 fn $method(self, other: $u) -> $o {
503 $imp::$method(self, &other)
504 }
505 }
506
507 impl<'a> $imp<&'a $u> for $t {
508 type Output = $o;
509
510 #[inline]
511 fn $method(self, other: &'a $u) -> $o {
512 $imp::$method(&self, other)
513 }
514 }
515
516 impl $imp<$u> for $t {
517 type Output = $o;
518
519 #[inline]
520 fn $method(self, other: $u) -> $o {
521 $imp::$method(&self, &other)
522 }
523 }
524 };
525}
526
527pub(crate) const fn explain_number_error(error: &ParseIntError) -> &'static str {
529 match error.kind() {
530 IntErrorKind::Empty => "is empty",
531 IntErrorKind::InvalidDigit => "contains an invalid character",
532 IntErrorKind::NegOverflow => "is too small to fit in the internal representation",
533 IntErrorKind::PosOverflow => "is too big to fit in the internal representation",
534 IntErrorKind::Zero => "is zero, which is not allowed here",
535 _ => "is not a valid number",
536 }
537}
538
539pub(crate) fn str_eq(a: &str, b: &str, ignore_casing: bool) -> bool {
541 if ignore_casing {
542 a.eq_ignore_ascii_case(b)
543 } else {
544 a == b
545 }
546}
547
548pub(crate) fn str_starts_with(a: &str, b: &str, ignore_casing: bool) -> bool {
550 for (a, b) in a.chars().zip(b.chars()) {
551 if ignore_casing && !a.eq_ignore_ascii_case(&b) || !ignore_casing && a != b {
552 return false;
553 }
554 }
555 a.len() >= b.len()
556}
557
558#[allow(clippy::missing_panics_doc)]
559#[test]
560fn starts_with() {
561 assert!(str_starts_with("aaabbb", "a", false));
562 assert!(str_starts_with("aaabbb", "aa", false));
563 assert!(str_starts_with("aaabbb", "aaa", false));
564 assert!(!str_starts_with("aaabbb", "b", false));
565 assert!(!str_starts_with("aaabbb", "ab", false));
566 assert!(!str_starts_with("aaabbb", "aab", false));
567 assert!(str_starts_with("aaabbb", "a", true));
568 assert!(str_starts_with("aaabbb", "aa", true));
569 assert!(str_starts_with("aaabbb", "aaa", true));
570 assert!(str_starts_with("aaabbb", "A", true));
571 assert!(str_starts_with("aaabbb", "AA", true));
572 assert!(str_starts_with("aaabbb", "AAA", true));
573 assert!(str_starts_with("aaabbb", "aaA", true));
574 assert!(!str_starts_with("aaabbb", "A", false));
575 assert!(!str_starts_with("aaabbb", "AA", false));
576 assert!(!str_starts_with("aaabbb", "AAA", false));
577 assert!(!str_starts_with("aaabbb", "aaA", false));
578}