use super::extract_region_tags;
use crate::transformations::prelude::*;
use fastqrab_config::{StringOrVecString, dna::Anchor, tpd_adapt_iupac_bstring};
#[derive(Clone, JsonSchema, Debug, Default)]
#[tpd]
enum TieBreak {
#[default]
Earliest,
LeftMost,
RightMost,
}
#[derive(Clone, JsonSchema)]
#[tpd]
#[expect(clippy::upper_case_acronyms, reason = "Domain name")]
#[derive(Debug)]
pub struct IUPAC {
#[tpd(with = "tpd_adapt_iupac_bstring")]
#[tpd(alias = "query")]
#[tpd(alias = "pattern")]
#[schemars(with = "StringOrVecString")]
search: Vec<BString>,
#[schemars(with = "String")]
#[tpd(adapt_in_verify(String))]
segment: SegmentIndex,
anchor: Anchor,
max_anchor_distance: usize,
out_label: TagLabel,
max_mismatches: u8,
#[tpd(default)]
on_tie: TieBreak,
}
impl VerifyIn<PartialConfig> for PartialIUPAC {
fn verify(
&mut self,
parent: &PartialConfig,
_options: &VerifyOptions,
) -> std::result::Result<(), ValidationFailure>
where
Self: Sized + toml_pretty_deser::Visitor,
{
self.segment.validate_segment(parent);
if let Some(Anchor::Anywhere) = self.anchor.as_ref()
&& self.max_anchor_distance.as_ref().is_some()
{
let spans = vec![
(
self.max_anchor_distance.span.clone(),
"Incomptable with anchor = anywhere".to_string(),
),
(
self.anchor.span.clone(),
"Incomptable with max_anchor_distance set".to_string(),
),
];
self.max_anchor_distance.state = TomlValueState::Custom { spans };
self.max_anchor_distance.help = Some("Either choose a different anchor, or remove max_anchor_distance, depending on your eneeds".to_string());
}
self.max_anchor_distance.or(0);
Ok(())
}
}
impl TagUser for PartialTaggedVariant<PartialIUPAC> {
fn get_tag_usage(
&mut self,
_tags_available: &IndexMap<TagLabel, TagMetadata>,
_segment_order: &[String],
) -> Option<TagUsageInfo<'_>> {
if let Some(inner) = self.toml_value.value.as_mut() {
Some(TagUsageInfo {
declared_tag: inner.out_label.to_declared_tag(TagValueType::Location),
..Default::default()
})
} else {
None }
}
}
impl Step for IUPAC {
fn apply(
&self,
mut block: FastQBlocksCombined,
_input_info: &InputInfo,
_demultiplex_info: &OptDemultiplex,
) -> Result<(FastQBlocksCombined, bool)> {
extract_region_tags(&mut block, self.segment, &self.out_label, |read| {
match &self.on_tie {
TieBreak::Earliest => {
for query in &self.search {
if let Some(hit) = read.find_iupac(
query,
self.anchor,
self.max_mismatches,
self.segment,
self.max_anchor_distance,
) {
return Some(hit);
}
}
return None;
}
TieBreak::LeftMost => {
return self
.search
.iter()
.filter_map(|query| {
read.find_iupac(
query,
self.anchor,
self.max_mismatches,
self.segment,
self.max_anchor_distance,
)
})
.min_by_key(|hit| {
hit.location
.as_ref()
.map(|x| x.start)
.expect("Found iupac should have had location set")
});
}
TieBreak::RightMost => {
return self
.search
.iter()
.filter_map(|query| {
read.find_iupac(
query,
self.anchor,
self.max_mismatches,
self.segment,
self.max_anchor_distance,
)
})
.max_by_key(|hit| {
hit.location
.as_ref()
.map(|x| x.start)
.expect("Found iupac should have had location set")
});
}
}
});
Ok((block, true))
}
}