1pub mod budget;
2pub mod classify;
3pub mod directed_kmer;
4pub mod dna;
5pub mod kmer;
6pub mod minimizer;
7pub mod mphf;
8pub mod output;
9pub mod params;
10pub mod pipeline;
11pub mod state;
12pub mod state_vector;
13pub mod superkmer;
14
15use std::path::PathBuf;
16
17use crate::kmer::{Kmer, KmerBits};
18use crate::mphf::RadixSortDedup;
19use crate::output::UnipathsMeta;
20use crate::params::Params;
21use crate::pipeline::run_pipeline;
22
23pub enum CfInput {
25 Files(Vec<PathBuf>),
27 ListFile(PathBuf),
29 Directory(PathBuf),
31}
32
33pub struct CfBuildResult {
35 pub seg_file: PathBuf,
37 pub seq_file: PathBuf,
39 pub json_file: PathBuf,
41 pub vertex_count: u64,
43 pub unitig_count: u64,
45 pub max_unitig_len: usize,
47 pub min_unitig_len: usize,
49 pub sum_unitig_len: u64,
51 pub short_seqs: Vec<(String, usize)>,
53}
54
55#[bon::builder]
71pub fn cf_build(
72 input: CfInput,
73 output_prefix: PathBuf,
74 #[builder(default = 31)] k: usize,
75 #[builder(default = 1)] threads: usize,
76 work_dir: Option<PathBuf>,
77 #[builder(default = 128)] num_bins: usize,
78 #[builder(default = 4.0)] memory_budget_gb: f64,
79) -> anyhow::Result<CfBuildResult> {
80 let input_files = resolve_input_files(&input)?;
81
82 let params = Params::from_resolved(
83 input_files,
84 k,
85 threads,
86 output_prefix,
87 3, work_dir,
89 true, true, true, num_bins,
93 memory_budget_gb,
94 )?;
95
96 let pool = rayon::ThreadPoolBuilder::new()
97 .num_threads(threads)
98 .build()?;
99
100 pool.install(|| dispatch_k!(k, run_and_collect, ¶ms))
101}
102
103fn resolve_input_files(input: &CfInput) -> anyhow::Result<Vec<PathBuf>> {
105 let mut files = Vec::new();
106 match input {
107 CfInput::Files(paths) => {
108 files.extend(paths.iter().cloned());
109 }
110 CfInput::ListFile(list_path) => {
111 let content = std::fs::read_to_string(list_path)?;
112 for line in content.lines() {
113 let line = line.trim();
114 if !line.is_empty() {
115 files.push(PathBuf::from(line));
116 }
117 }
118 }
119 CfInput::Directory(dir) => {
120 for entry in std::fs::read_dir(dir)? {
121 let entry = entry?;
122 let path = entry.path();
123 if path.is_file() {
124 let ext = path
125 .extension()
126 .and_then(|e| e.to_str())
127 .unwrap_or("");
128 if matches!(ext, "fa" | "fasta" | "fna" | "gz" | "fq" | "fastq") {
129 files.push(path);
130 }
131 }
132 }
133 }
134 }
135 anyhow::ensure!(!files.is_empty(), "No input files found");
136 Ok(files)
137}
138
139fn run_and_collect<const K: usize>(params: &Params) -> anyhow::Result<CfBuildResult>
141where
142 Kmer<K>: KmerBits,
143 <Kmer<K> as KmerBits>::Storage: RadixSortDedup,
144{
145 let (meta, short_seqs) = run_pipeline::<K>(params)?;
146 Ok(build_result(params, &meta, short_seqs))
147}
148
149fn build_result(params: &Params, meta: &UnipathsMeta, short_seqs: Vec<(String, usize)>) -> CfBuildResult {
151 CfBuildResult {
152 seg_file: params.segment_file_path(),
153 seq_file: params.sequence_file_path(),
154 json_file: params.json_file_path(),
155 vertex_count: meta.kmer_count,
156 unitig_count: meta.unipath_count,
157 max_unitig_len: meta.max_len,
158 min_unitig_len: if meta.min_len == usize::MAX { 0 } else { meta.min_len },
159 sum_unitig_len: meta.sum_len,
160 short_seqs,
161 }
162}