use crate::transformations::calc::extract_numeric_tags_plus_all;
use crate::transformations::prelude::*;
use fastqrab_config::tpd_adapt_bstring_uppercase;
#[derive(Clone, JsonSchema)]
#[tpd]
#[derive(Debug)]
pub struct BaseContent {
pub out_label: TagLabel,
#[schemars(with = "String")]
#[tpd(adapt_in_verify(String))]
segment: SegmentIndexOrAll,
pub relative: bool,
#[schemars(with = "String")]
#[tpd(with = "tpd_adapt_bstring_uppercase")]
pub bases_to_count: BString,
#[tpd(default)]
#[schemars(with = "String")]
#[tpd(with = "tpd_adapt_bstring_uppercase")]
pub bases_to_ignore: BString,
#[tpd(skip)]
#[schemars(skip)]
bases_to_count_lookup: Vec<bool>,
#[tpd(skip)]
#[schemars(skip)]
bases_to_ignore_lookup: Vec<bool>,
}
fn build_lookup(bases: &BString) -> Vec<bool> {
let mut lookup = vec![false; 256];
for ch in bases.as_slice() {
let idx = *ch as usize;
lookup[idx] = true;
}
lookup
}
impl VerifyIn<PartialConfig> for PartialBaseContent {
fn verify(
&mut self,
parent: &PartialConfig,
_options: &VerifyOptions,
) -> std::result::Result<(), ValidationFailure>
where
Self: Sized + toml_pretty_deser::Visitor,
{
self.relative.or(true);
self.segment.validate_segment(parent);
self.bases_to_count.verify(|v| {
if v.is_empty() {
return Err(ValidationFailure::new(
"Must contain at least one letter (base)",
None,
));
}
Ok(())
});
self.bases_to_ignore.verify(|v| {
if !self.relative.as_ref().is_some_and(|x| *x) {
return Err(ValidationFailure::new(
"Bases can only be ignored in relative=true mode",
Some("Either set relative=false or remove the bases_to_ignore field"),
));
}
for letter in v.iter() {
if !letter.is_ascii_alphabetic() {
return Err(ValidationFailure::new(
"Only ASCII letters are allowed as bases".to_string(),
Some(format!("Invalid character: '{}'", *letter as char)),
));
}
}
Ok(())
});
self.bases_to_ignore.or_with(BString::default);
if let Some(bases_to_count) = self.bases_to_count.as_ref() {
self.bases_to_count_lookup = Some(build_lookup(bases_to_count));
}
if let Some(bases_to_ignore) = self.bases_to_ignore.as_ref() {
self.bases_to_ignore_lookup = Some(build_lookup(bases_to_ignore));
}
Ok(())
}
}
impl PartialBaseContent {
pub fn new(
out_label: TomlValue<TagLabel>,
segment: TomlValue<MustAdapt<String, SegmentIndexOrAll>>,
relative: bool,
bases_to_count: BString,
bases_to_ignore: BString,
) -> PartialBaseContent {
Self {
out_label,
segment,
relative: TomlValue::new_ok_unplaced(relative),
bases_to_count_lookup: Some(build_lookup(&bases_to_count)),
bases_to_ignore_lookup: Some(build_lookup(&bases_to_ignore)),
bases_to_count: TomlValue::new_ok_unplaced(bases_to_count),
bases_to_ignore: TomlValue::new_ok_unplaced(bases_to_ignore),
}
}
}
impl BaseContent {
fn sequence_totals(
sequence: &[u8],
bases_to_count: &[bool],
bases_to_ignore: &[bool],
) -> (usize, usize) {
let mut considered = 0usize;
let mut counted = 0usize;
for &base in sequence {
let idx = base.to_ascii_uppercase() as usize;
if bases_to_ignore[idx] {
continue;
}
considered += 1;
if bases_to_count[idx] {
counted += 1;
}
}
(considered, counted)
}
fn percentage(counted: usize, considered: usize) -> f64 {
if considered == 0 {
0.0
} else {
#[expect(
clippy::cast_precision_loss,
reason = "loss is acceptable, it's going to be within u32 range"
)]
{
counted as f64 / considered as f64
}
}
}
}
impl TagUser for PartialTaggedVariant<PartialBaseContent> {
fn get_tag_usage(
&mut self,
_tags_available: &IndexMap<TagLabel, TagMetadata>,
_segment_order: &[String],
) -> Option<TagUsageInfo<'_>> {
if let Some(inner) = self.toml_value.value.as_mut() {
Some(TagUsageInfo {
declared_tag: inner.out_label.to_declared_tag(TagValueType::Numeric(
if inner.relative.as_ref().is_some_and(|x| *x) {
(
Some(NonNaN::new(0.0).expect("can't fail")),
Some(NonNaN::new(1.0).expect("can't fail")),
)
} else {
(None, None)
},
)),
..Default::default()
})
} else {
None }
}
}
impl Step for BaseContent {
#[expect(
clippy::cast_precision_loss,
reason = "loss is acceptable, it's going to be within u32 range"
)]
fn apply(
&self,
mut block: FastQBlocksCombined,
_input_info: &InputInfo,
_demultiplex_info: &OptDemultiplex,
) -> anyhow::Result<(FastQBlocksCombined, bool)> {
let segment = self.segment;
let bases_to_count_single = self.bases_to_count_lookup.clone();
let bases_to_ignore_single = self.bases_to_ignore_lookup.clone();
let bases_to_count_all = self.bases_to_count_lookup.clone();
let bases_to_ignore_all = self.bases_to_ignore_lookup.clone();
let relative = self.relative;
extract_numeric_tags_plus_all(
segment,
&self.out_label,
move |read| {
let sequence = read.seq();
let (considered, counted) = Self::sequence_totals(
sequence,
&bases_to_count_single,
&bases_to_ignore_single,
);
if relative {
Self::percentage(counted, considered)
} else {
counted as f64
}
},
move |reads| {
let mut total_considered = 0usize;
let mut total_counted = 0usize;
for read in reads {
let (considered, counted) = Self::sequence_totals(
read.seq(),
&bases_to_count_all,
&bases_to_ignore_all,
);
total_considered += considered;
total_counted += counted;
}
if relative {
Self::percentage(total_counted, total_considered)
} else {
total_counted as f64
}
},
&mut block,
);
Ok((block, true))
}
}