use crate::essentials_fields::MafRecord;
use crate::extract_ann_and_ann_names::extract_ann_regex;
use crate::extract_csq_and_csq_names::extract_csq_regex;
use crate::extract_sample_info::ParsedFormatSample;
use crate::get_info_from_header::{extract_ann_format_from_header, extract_csq_format_from_header};
use flate2::write::GzEncoder;
use flate2::Compression;
use rayon::prelude::*;
use std::collections::HashMap;
use std::fs::{create_dir_all, File};
use std::io::{BufWriter, Write};
use std::path::Path;
#[derive(Debug, Clone, Copy)]
pub enum AnnotationType {
Vep,
SnpEff,
Auto,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AnnotationFieldType {
Csq,
Ann,
None,
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
struct AnnotationParseResult {
field_type: AnnotationFieldType,
records: Vec<HashMap<String, String>>,
remaining_info: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TranscriptHandling {
MostSevere,
FirstOnly,
SplitRows,
}
#[derive(Debug, Clone)]
pub struct ReformattedVcfRecord {
pub chromosome: String,
pub position: u64,
pub id: Option<String>,
pub reference: String,
pub alternate: String,
pub quality: Option<f64>,
pub filter: String,
pub info_fields: HashMap<String, String>,
pub format_sample_data: Option<ParsedFormatSample>,
}
impl ReformattedVcfRecord {
pub fn from_vcf_line(
line: &str,
column_names: &[&str],
csq_field_names: &Option<Vec<String>>,
ann_field_names: &Option<Vec<String>>,
transcript_handling: TranscriptHandling,
) -> std::result::Result<Vec<Self>, Box<dyn std::error::Error>> {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 8 {
return Err("Invalid VCF line: insufficient fields".into());
}
let chromosome = fields[0].to_string();
let position: u64 = fields[1]
.parse()
.map_err(|e| format!("Invalid position '{}': {}", fields[1], e))?;
let id = if fields[2] == "." {
None
} else {
Some(fields[2].to_string())
};
let reference = fields[3].to_string();
let alternate = fields[4].to_string();
let quality = if fields[5] == "." {
None
} else {
Some(
fields[5]
.parse()
.map_err(|e| format!("Invalid quality '{}': {}", fields[5], e))?,
)
};
let filter = fields[6].to_string();
let info = fields[7];
let format_sample_data = if fields.len() > 8 {
Some(ParsedFormatSample::from_vcf_fields(
&fields[8..],
column_names,
)?)
} else {
None
};
let info_variants =
parse_info_field(info, csq_field_names, ann_field_names, transcript_handling)?;
let records: Vec<Self> = info_variants
.into_iter()
.map(|info_fields| Self {
chromosome: chromosome.clone(),
position,
id: id.clone(),
reference: reference.clone(),
alternate: alternate.clone(),
quality,
filter: filter.clone(),
info_fields,
format_sample_data: format_sample_data.clone(),
})
.collect();
Ok(records)
}
}
pub fn parse_info_field(
info: &str,
csq_field_names: &Option<Vec<String>>,
ann_field_names: &Option<Vec<String>>,
transcript_handling: TranscriptHandling,
) -> std::result::Result<Vec<HashMap<String, String>>, Box<dyn std::error::Error>> {
if info.is_empty() {
return Ok(vec![HashMap::new()]);
}
let annotation_result =
parse_annotation_fields(info, csq_field_names, ann_field_names, transcript_handling)?;
let remaining_info_map = parse_remaining_info_fields(&annotation_result.remaining_info)?;
let combined_records =
combine_annotation_with_info(annotation_result.records, remaining_info_map.clone());
if combined_records.is_empty() {
Ok(vec![remaining_info_map])
} else {
Ok(combined_records)
}
}
fn parse_annotation_fields(
info: &str,
csq_field_names: &Option<Vec<String>>,
ann_field_names: &Option<Vec<String>>,
transcript_handling: TranscriptHandling,
) -> Result<AnnotationParseResult, Box<dyn std::error::Error>> {
let mut parsed_lines = create_dummy_vcf_line(info);
if let Some(csq_value) = extract_csq_regex(&mut parsed_lines) {
if let Some(field_names) = csq_field_names {
if !field_names.is_empty() && !csq_value.trim().is_empty() {
match parse_csq_field_with_handling(&csq_value, field_names, transcript_handling) {
Ok(records) if !records.is_empty() => {
return Ok(AnnotationParseResult {
field_type: AnnotationFieldType::Csq,
records,
remaining_info: parsed_lines[7].clone(),
});
}
Ok(_) => {}
Err(e) => {
eprintln!("Warning: Failed to parse CSQ field: {e}");
}
}
}
}
}
parsed_lines[7] = info.to_string();
if let Some(ann_value) = extract_ann_regex(&mut parsed_lines) {
if let Some(field_names) = ann_field_names {
if !field_names.is_empty() && !ann_value.trim().is_empty() {
match parse_ann_field_with_handling(&ann_value, field_names, transcript_handling) {
Ok(records) if !records.is_empty() => {
return Ok(AnnotationParseResult {
field_type: AnnotationFieldType::Ann,
records,
remaining_info: parsed_lines[7].clone(),
});
}
Ok(_) => {}
Err(e) => {
eprintln!("Warning: Failed to parse ANN field: {e}");
}
}
}
}
}
Ok(AnnotationParseResult {
field_type: AnnotationFieldType::None,
records: Vec::new(),
remaining_info: info.to_string(),
})
}
fn create_dummy_vcf_line(info: &str) -> Vec<String> {
vec![
"chr1".to_string(),
"100".to_string(),
".".to_string(),
"A".to_string(),
"G".to_string(),
"60".to_string(),
"PASS".to_string(),
info.to_string(),
]
}
fn parse_csq_field_with_handling(
csq_value: &str,
csq_field_names: &[String],
transcript_handling: TranscriptHandling,
) -> Result<Vec<HashMap<String, String>>, Box<dyn std::error::Error>> {
if csq_value.trim().is_empty() {
return Ok(Vec::new());
}
let annotations: Vec<&str> = csq_value
.split(',')
.filter(|s| !s.trim().is_empty())
.collect();
if annotations.is_empty() {
return Ok(Vec::new());
}
match transcript_handling {
TranscriptHandling::FirstOnly => {
let first_annotation = annotations
.first()
.ok_or("No annotations found after filtering")?;
let parsed = parse_single_csq_annotation(first_annotation, csq_field_names)?;
Ok(vec![parsed])
}
TranscriptHandling::MostSevere => {
let most_severe = find_most_severe_consequence(&annotations, csq_field_names)?;
Ok(vec![most_severe])
}
TranscriptHandling::SplitRows => {
let mut all_annotations = Vec::new();
for annotation in annotations {
match parse_single_csq_annotation(annotation, csq_field_names) {
Ok(parsed) => all_annotations.push(parsed),
Err(e) => {
eprintln!("Warning: Failed to parse CSQ annotation '{annotation}': {e}");
}
}
}
Ok(all_annotations)
}
}
}
fn parse_ann_field_with_handling(
ann_value: &str,
ann_field_names: &[String],
transcript_handling: TranscriptHandling,
) -> Result<Vec<HashMap<String, String>>, Box<dyn std::error::Error>> {
if ann_value.trim().is_empty() {
return Ok(Vec::new());
}
let annotations: Vec<&str> = ann_value
.split(',')
.filter(|s| !s.trim().is_empty())
.collect();
if annotations.is_empty() {
return Ok(Vec::new());
}
match transcript_handling {
TranscriptHandling::FirstOnly => {
let first_annotation = annotations
.first()
.ok_or("No annotations found after filtering")?;
let parsed = parse_single_ann_annotation(first_annotation, ann_field_names)?;
Ok(vec![parsed])
}
TranscriptHandling::MostSevere => {
let most_severe = find_most_severe_ann_consequence(&annotations, ann_field_names)?;
Ok(vec![most_severe])
}
TranscriptHandling::SplitRows => {
let mut all_annotations = Vec::new();
for annotation in annotations {
match parse_single_ann_annotation(annotation, ann_field_names) {
Ok(parsed) => all_annotations.push(parsed),
Err(e) => {
eprintln!("Warning: Failed to parse ANN annotation '{annotation}': {e}");
}
}
}
Ok(all_annotations)
}
}
}
fn parse_single_csq_annotation(
annotation: &str,
csq_field_names: &[String],
) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
if annotation.trim().is_empty() {
return Err("Empty CSQ annotation".into());
}
let values: Vec<&str> = annotation.split('|').collect();
let mut annotation_map = HashMap::new();
for (i, field_name) in csq_field_names.iter().enumerate() {
let value = values.get(i).unwrap_or(&"").trim();
let formatted_value = if value.is_empty() { "." } else { value };
annotation_map.insert(
format!("CSQ_{}", sanitize_field_name(field_name)),
formatted_value.to_string(),
);
}
if values.len() > csq_field_names.len() {
for (i, value) in values.iter().enumerate().skip(csq_field_names.len()) {
annotation_map.insert(
format!("CSQ_EXTRA_{}", i - csq_field_names.len() + 1),
value.trim().to_string(),
);
}
}
Ok(annotation_map)
}
fn parse_single_ann_annotation(
annotation: &str,
ann_field_names: &[String],
) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
if annotation.trim().is_empty() {
return Err("Empty ANN annotation".into());
}
let values: Vec<&str> = annotation.split('|').collect();
let mut annotation_map = HashMap::new();
for (i, field_name) in ann_field_names.iter().enumerate() {
let value = values.get(i).unwrap_or(&"").trim();
let formatted_value = if value.is_empty() { "." } else { value };
annotation_map.insert(
format!("ANN_{}", sanitize_field_name(field_name)),
formatted_value.to_string(),
);
}
if values.len() > ann_field_names.len() {
for (i, value) in values.iter().enumerate().skip(ann_field_names.len()) {
annotation_map.insert(
format!("ANN_EXTRA_{}", i - ann_field_names.len() + 1),
value.trim().to_string(),
);
}
}
Ok(annotation_map)
}
fn find_most_severe_ann_consequence(
annotations: &[&str],
ann_field_names: &[String],
) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
if annotations.is_empty() {
return Err("No annotations provided".into());
}
let impact_index = ann_field_names
.iter()
.position(|name| {
name.to_lowercase().contains("impact")
|| name.to_lowercase().contains("annotation_impact")
})
.unwrap_or(2);
let mut most_severe: Option<&str> = None;
let mut highest_severity = 0;
for annotation in annotations {
let values: Vec<&str> = annotation.split('|').collect();
if let Some(impact) = values.get(impact_index) {
let severity = get_ann_impact_severity(impact);
if severity > highest_severity {
highest_severity = severity;
most_severe = Some(annotation);
}
}
}
let selected_annotation = most_severe.unwrap_or(annotations[0]);
parse_single_ann_annotation(selected_annotation, ann_field_names)
}
pub fn get_ann_impact_severity(impact: &str) -> u8 {
match impact.trim().to_uppercase().as_str() {
"HIGH" => 4,
"MODERATE" => 3,
"LOW" => 2,
"MODIFIER" => 1,
_ => 0,
}
}
fn parse_remaining_info_fields(
remaining_info: &str,
) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
let mut info_map = HashMap::new();
if remaining_info.trim().is_empty() {
return Ok(info_map);
}
for pair in remaining_info.split(';') {
let trimmed_pair = pair.trim();
if trimmed_pair.is_empty() {
continue;
}
match trimmed_pair.split_once('=') {
Some((key, value)) => {
let sanitized_key = sanitize_field_name(key.trim());
let sanitized_value = value.trim();
if !sanitized_key.is_empty() {
info_map.insert(format!("INFO_{sanitized_key}"), sanitized_value.to_string());
}
}
None => {
let sanitized_key = sanitize_field_name(trimmed_pair);
if !sanitized_key.is_empty() {
info_map.insert(format!("INFO_{sanitized_key}"), "true".to_string());
}
}
}
}
Ok(info_map)
}
fn combine_annotation_with_info(
annotation_records: Vec<HashMap<String, String>>,
info_fields: HashMap<String, String>,
) -> Vec<HashMap<String, String>> {
if annotation_records.is_empty() {
return vec![info_fields];
}
annotation_records
.into_iter()
.map(|mut annotation_map| {
for (key, value) in &info_fields {
annotation_map.insert(key.clone(), value.clone());
}
annotation_map
})
.collect()
}
pub fn sanitize_field_name(field_name: &str) -> String {
field_name
.chars()
.map(|c| {
if c.is_alphanumeric() || c == '_' {
c
} else {
'_'
}
})
.collect::<String>()
.trim_start_matches('_')
.trim_end_matches('_')
.to_string()
}
fn find_most_severe_consequence(
annotations: &[&str],
csq_field_names: &[String],
) -> std::result::Result<HashMap<String, String>, Box<dyn std::error::Error>> {
if annotations.is_empty() {
return Err("No annotations provided".into());
}
let severity_order = vec![
"transcript_ablation",
"splice_acceptor_variant",
"splice_donor_variant",
"stop_gained",
"frameshift_variant",
"stop_lost",
"start_lost",
"transcript_amplification",
"inframe_insertion",
"inframe_deletion",
"missense_variant",
"protein_altering_variant",
"splice_region_variant",
"incomplete_terminal_codon_variant",
"start_retained_variant",
"stop_retained_variant",
"synonymous_variant",
"coding_sequence_variant",
"mature_miRNA_variant",
"5_prime_UTR_variant",
"3_prime_UTR_variant",
"non_coding_transcript_exon_variant",
"intron_variant",
"NMD_transcript_variant",
"non_coding_transcript_variant",
"upstream_gene_variant",
"downstream_gene_variant",
"TFBS_ablation",
"TFBS_amplification",
"TF_binding_site_variant",
"regulatory_region_ablation",
"regulatory_region_amplification",
"feature_elongation",
"regulatory_region_variant",
"feature_truncation",
"intergenic_variant",
];
let mut most_severe_annotation = annotations[0];
let mut best_severity = usize::MAX;
let consequence_index = csq_field_names
.iter()
.position(|name| name == "Consequence")
.unwrap_or(1);
for annotation in annotations {
let values: Vec<&str> = annotation.split('|').collect();
if let Some(consequence) = values.get(consequence_index) {
let consequences: Vec<&str> = consequence.split('&').collect();
for cons in consequences {
if let Some(severity) = severity_order.iter().position(|&x| x == cons) {
if severity < best_severity {
best_severity = severity;
most_severe_annotation = annotation;
}
}
}
}
}
parse_single_csq_annotation(most_severe_annotation, csq_field_names)
}
pub fn reformat_vcf_data_with_header(
header: &str,
column_names: &str,
data_lines: &[String],
transcript_handling: TranscriptHandling,
) -> std::result::Result<(Vec<String>, Vec<ReformattedVcfRecord>), Box<dyn std::error::Error>> {
let csq_field_names = extract_csq_format_from_header(header);
let ann_field_names = extract_ann_format_from_header(header);
let column_names_vec: Vec<&str> = column_names.trim_start_matches('#').split('\t').collect();
let mut all_records = Vec::new();
for (line_num, line) in data_lines.iter().enumerate() {
match ReformattedVcfRecord::from_vcf_line(
line,
&column_names_vec,
&csq_field_names,
&ann_field_names,
transcript_handling,
) {
Ok(mut records) => {
all_records.append(&mut records);
}
Err(e) => {
eprintln!(
"Warning: Failed to parse line {}: {} ({})",
line_num + 1,
e,
line
);
}
}
}
let headers = generate_headers_from_records(&all_records, &column_names_vec);
Ok((headers, all_records))
}
fn generate_headers_from_records(
records: &[ReformattedVcfRecord],
column_names_vec: &[&str],
) -> Vec<String> {
if let Some(first_record) = records.first() {
let sample_names: Vec<String> = if column_names_vec.len() > 9 {
column_names_vec[9..]
.iter()
.map(|s| s.to_string())
.collect()
} else {
vec![]
};
generate_headers_from_record(first_record, &sample_names)
} else {
vec![]
}
}
pub fn reformat_vcf_data_with_header_parallel(
header: &str,
column_names: &str,
data_lines: &[String],
transcript_handling: TranscriptHandling,
) -> std::result::Result<(Vec<String>, Vec<ReformattedVcfRecord>), Box<dyn std::error::Error>> {
let csq_field_names = extract_csq_format_from_header(header);
let ann_field_names = extract_ann_format_from_header(header);
let column_names_vec: Vec<&str> = column_names.trim_start_matches('#').split('\t').collect();
let all_results: Vec<Vec<ReformattedVcfRecord>> = data_lines
.par_iter()
.enumerate()
.map(|(line_num, line)| {
ReformattedVcfRecord::from_vcf_line(
line,
&column_names_vec,
&csq_field_names,
&ann_field_names,
transcript_handling,
)
.unwrap_or_else(|e| {
eprintln!(
"Warning: Failed to parse line {}: {} ({})",
line_num + 1,
e,
line
);
Vec::new()
})
})
.collect();
let mut flattened_records = Vec::new();
for mut records in all_results {
flattened_records.append(&mut records);
}
let headers = generate_headers_from_records(&flattened_records, &column_names_vec);
Ok((headers, flattened_records))
}
fn generate_headers_from_record(
record: &ReformattedVcfRecord,
_sample_names: &[String],
) -> Vec<String> {
let mut headers = vec![
"CHROM".to_string(),
"POS".to_string(),
"ID".to_string(),
"REF".to_string(),
"ALT".to_string(),
"QUAL".to_string(),
"FILTER".to_string(),
];
let mut info_keys: Vec<String> = record
.info_fields
.keys()
.filter(|k| k.starts_with("INFO_"))
.cloned()
.collect();
info_keys.sort();
headers.extend(info_keys);
let mut csq_keys: Vec<String> = record
.info_fields
.keys()
.filter(|k| k.starts_with("CSQ_"))
.cloned()
.collect();
csq_keys.sort();
headers.extend(csq_keys);
let mut ann_keys: Vec<String> = record
.info_fields
.keys()
.filter(|k| k.starts_with("ANN_"))
.cloned()
.collect();
ann_keys.sort();
headers.extend(ann_keys);
if let Some(ref sample_data) = record.format_sample_data {
headers.extend(sample_data.get_headers_for_samples());
}
headers
}
pub fn write_reformatted_vcf(
filename: &str,
headers: &[String],
records: &[ReformattedVcfRecord],
compress: bool,
) -> std::io::Result<()> {
if let Some(parent) = Path::new(filename).parent() {
create_dir_all(parent)?;
}
let file = File::create(filename)?;
if compress {
let encoder = GzEncoder::new(file, Compression::default());
let mut writer = BufWriter::new(encoder);
write_tsv_content(&mut writer, headers, records)?;
writer.flush()?;
} else {
let mut writer = BufWriter::new(file);
write_tsv_content(&mut writer, headers, records)?;
writer.flush()?;
}
Ok(())
}
#[allow(clippy::collapsible_else_if)]
fn write_tsv_content<W: Write>(
writer: &mut W,
headers: &[String],
records: &[ReformattedVcfRecord],
) -> std::io::Result<()> {
writeln!(writer, "{}", headers.join("\t"))?;
for record in records {
let mut row = Vec::new();
for header in headers {
let value = match header.as_str() {
"CHROM" => record.chromosome.clone(),
"POS" => record.position.to_string(),
"ID" => record.id.as_ref().unwrap_or(&".".to_string()).clone(),
"REF" => record.reference.clone(),
"ALT" => record.alternate.clone(),
"QUAL" => record
.quality
.map(|q| q.to_string())
.unwrap_or(".".to_string()),
"FILTER" => record.filter.clone(),
_ => {
if header.starts_with("INFO_")
|| header.starts_with("CSQ_")
|| header.starts_with("ANN_")
{
record
.info_fields
.get(header)
.unwrap_or(&".".to_string())
.clone()
} else {
if let Some(ref sample_data) = record.format_sample_data {
let mut found_value = None;
for sample in &sample_data.samples {
for format_key in &sample_data.format_keys {
let expected_header =
format!("{}_{}", sample.sample_name, format_key);
if expected_header == *header {
found_value = sample.format_fields.get(format_key).cloned();
break;
}
}
if found_value.is_some() {
break;
}
}
found_value.unwrap_or(".".to_string())
} else {
".".to_string()
}
}
}
};
row.push(value);
}
writeln!(writer, "{}", row.join("\t"))?;
}
Ok(())
}
pub fn _reformat_vcf_data(
column_names: &str,
data_lines: &[String],
) -> std::result::Result<(Vec<String>, Vec<ReformattedVcfRecord>), Box<dyn std::error::Error>> {
reformat_vcf_data_with_header("", column_names, data_lines, TranscriptHandling::FirstOnly)
}
pub fn write_maf_file(
filename: &str,
records: &[MafRecord],
compress: bool,
) -> std::io::Result<()> {
if compress {
let file = std::fs::File::create(filename)?;
let mut encoder = GzEncoder::new(file, Compression::default());
write_maf_content(&mut encoder, records)?;
encoder.finish()?;
} else {
let mut file = std::fs::File::create(filename)?;
write_maf_content(&mut file, records)?;
}
Ok(())
}
fn write_maf_content<W: Write>(writer: &mut W, records: &[MafRecord]) -> std::io::Result<()> {
let headers = MafRecord::get_maf_headers();
writeln!(writer, "{}", headers.join("\t"))?;
for record in records {
writeln!(writer, "{}", record.to_tsv_line())?;
}
Ok(())
}
pub fn reformat_vcf_data_with_header_parallel_chunked(
header: &str,
column_names: &str,
data_lines: &[String],
transcript_handling: TranscriptHandling,
output_writer: &mut dyn Write,
) -> std::result::Result<Vec<String>, Box<dyn std::error::Error>> {
let csq_field_names = extract_csq_format_from_header(header);
let ann_field_names = extract_ann_format_from_header(header);
let column_names_vec: Vec<&str> = column_names.trim_start_matches('#').split('\t').collect();
let chunk_size = if data_lines.len() > 1_000_000 {
50_000
} else if data_lines.len() > 100_000 {
100_000
} else {
data_lines.len()
};
let mut headers_generated = false;
let mut output_headers: Vec<String> = Vec::new();
let mut total_processed = 0usize;
println!(
"🔄 Processing {} lines in chunks of {}",
data_lines.len(),
chunk_size
);
for (chunk_idx, chunk) in data_lines.chunks(chunk_size).enumerate() {
let chunk_results: Vec<Vec<ReformattedVcfRecord>> = chunk
.par_iter()
.enumerate()
.map(|(line_num, line)| {
ReformattedVcfRecord::from_vcf_line(
line,
&column_names_vec,
&csq_field_names,
&ann_field_names,
transcript_handling,
)
.unwrap_or_else(|e| {
let global_line_num = chunk_idx * chunk_size + line_num + 1;
eprintln!(
"Warning: Failed to parse line {}: {} ({})",
global_line_num, e, line
);
Vec::new()
})
})
.collect();
let chunk_records: Vec<ReformattedVcfRecord> =
chunk_results.into_iter().flatten().collect();
if !headers_generated && !chunk_records.is_empty() {
output_headers = generate_headers_from_records(&chunk_records, &column_names_vec);
writeln!(output_writer, "{}", output_headers.join("\t"))?;
headers_generated = true;
println!("📋 Generated {} column headers", output_headers.len());
}
for record in chunk_records {
let values = extract_values_from_record(&record, &output_headers);
writeln!(output_writer, "{}", values.join("\t"))?;
}
total_processed += chunk.len();
if total_processed % 100_000 == 0 {
println!(" 📊 Streamed {} lines so far...", total_processed);
}
}
println!(
"✅ Streaming complete! Processed {} total lines",
total_processed
);
Ok(output_headers)
}
fn extract_values_from_record(record: &ReformattedVcfRecord, headers: &[String]) -> Vec<String> {
headers
.iter()
.map(|header| {
match header.as_str() {
"CHROM" => record.chromosome.clone(),
"POS" => record.position.to_string(),
"ID" => record.id.as_deref().unwrap_or(".").to_string(),
"REF" => record.reference.clone(),
"ALT" => record.alternate.clone(),
"QUAL" => record.quality.map_or(".".to_string(), |q| q.to_string()),
"FILTER" => record.filter.clone(),
_ => {
if let Some(value) = record.info_fields.get(header) {
value.clone()
} else if let Some(sample_data) = &record.format_sample_data {
extract_sample_value_for_header(sample_data, header)
} else {
".".to_string()
}
}
}
})
.collect()
}
fn extract_sample_value_for_header(sample_data: &ParsedFormatSample, header: &str) -> String {
for sample in &sample_data.samples {
for format_key in &sample_data.format_keys {
let expected_header = format!("{}_{}", sample.sample_name, format_key);
if expected_header == header {
return sample
.format_fields
.get(format_key)
.cloned()
.unwrap_or_else(|| ".".to_string());
}
}
}
".".to_string()
}