1use anyhow::Result;
4use clap::{Parser, Subcommand};
5
6#[derive(Debug, Parser)]
8pub struct InspectOptions {
9 #[clap(required = true)]
11 pub index: String,
12
13 #[clap(subcommand)]
15 pub command: InspectCommand,
16}
17
18#[derive(Debug, Subcommand)]
20pub enum InspectCommand {
21 Summary,
23
24 Extract,
26
27 Stats,
29
30 Test,
32
33 DebugPos {
35 #[clap(long)]
37 offset: Option<u64>,
38
39 #[clap(long)]
41 test: bool,
42 },
43}
44
45pub fn run_with_options(opts: InspectOptions) -> Result<()> {
47 run_with_opts(opts)
48}
49
50pub fn run() -> Result<()> {
52 let opts = InspectOptions::parse();
53 run_with_options(opts)
54}
55
56#[allow(dead_code)]
57fn main() -> Result<()> {
58 run()
59}
60
61fn run_with_opts(opts: InspectOptions) -> Result<()> {
62 let index = rustalign_fmindex::Ebwt::load(&opts.index)?;
63 let params = index.params();
64
65 match opts.command {
66 InspectCommand::Summary => {
67 println!("Index: {}", opts.index);
68 println!("Reference length: {}", params.len);
69 println!("BWT length: {}", params.bwt_len);
70 println!("Number of sides: {}", params.num_sides);
71 println!("Side size: {} bytes", params.side_sz);
72 println!("Total size: {} bytes", params.total_size());
73 }
74 InspectCommand::Extract => {
75 println!("Extract not yet implemented");
76 }
77 InspectCommand::Stats => {
78 println!("EbwtParams:");
79 println!(" len: {}", params.len);
80 println!(" bwt_len: {}", params.bwt_len);
81 println!(" bwt_sz: {}", params.bwt_sz);
82 println!(" line_sz: {}", params.line_sz);
83 println!(" side_sz: {}", params.side_sz);
84 println!(" side_bwt_sz: {}", params.side_bwt_sz);
85 println!(" side_bwt_len: {}", params.side_bwt_len);
86 println!(" num_sides: {}", params.num_sides);
87 println!(" off_rate: {}", params.off_rate);
88 println!(" ftab_chars: {}", params.ftab_chars);
89 println!(" ftab_len: {}", params.ftab_len);
90 println!(" ebwt_tot_len: {}", params.ebwt_tot_len);
91 }
92 InspectCommand::Test => {
93 use rustalign_common::Nuc;
94 use rustalign_fmindex::FmIndex;
95
96 println!("Testing exact_match...");
97
98 let pattern: Vec<Nuc> = "GATTGCTCGC"
100 .chars()
101 .map(|c| Nuc::from_ascii(c as u8).unwrap())
102 .collect();
103 println!("\nSearching for: GATTGCTCGC");
104
105 match index.exact_match(&pattern) {
106 Ok(positions) => {
107 println!("Found {} matches", positions.len());
108 if !positions.is_empty() {
109 println!(
110 "First 10 positions: {:?}",
111 &positions[..positions.len().min(10)]
112 );
113 }
114 }
115 Err(e) => {
116 println!("Error: {:?}", e);
117 }
118 }
119
120 let pattern: Vec<Nuc> = vec![Nuc::A; 10];
122 println!("\nSearching for: AAAAAAAAAA");
123
124 match index.exact_match(&pattern) {
125 Ok(positions) => {
126 println!("Found {} matches", positions.len());
127 }
128 Err(e) => {
129 println!("Error: {:?}", e);
130 }
131 }
132 }
133 InspectCommand::DebugPos { offset, test } => {
134 println!("=== Debug Position Resolution ===");
135 println!();
136
137 println!("Chromosomes:");
139 for (i, name) in index.refnames().iter().enumerate() {
140 let len = index.plen().get(i).copied().unwrap_or(0);
141 println!(" [{}] {} ({} bp)", i, name, len);
142 }
143 println!();
144
145 println!("rstarts array (first 10 fragments):");
147 let rstarts = index.rstarts();
148 for i in 0..10.min(index.n_frag() as usize) {
149 let frag_start = rstarts[i * 3];
150 let text_id = rstarts[i * 3 + 1];
151 let frag_off = rstarts[i * 3 + 2];
152 println!(
153 " [{}] frag_start={}, text_id={}, frag_off={}",
154 i, frag_start, text_id, frag_off
155 );
156 }
157 println!(" ... (showing first 10 of {} fragments)", index.n_frag());
158 println!();
159
160 let n_frag = index.n_frag() as usize;
162 if n_frag > 5 {
163 println!("rstarts array (last 5 fragments):");
164 for i in (n_frag - 5)..n_frag {
165 let frag_start = rstarts[i * 3];
166 let text_id = rstarts[i * 3 + 1];
167 let frag_off = rstarts[i * 3 + 2];
168 println!(
169 " [{}] frag_start={}, text_id={}, frag_off={}",
170 i, frag_start, text_id, frag_off
171 );
172 }
173 println!();
174 }
175
176 if let Some(off) = offset {
178 println!("Resolving offset {}:", off);
179 match index.joined_to_text_off(100, off, true) {
180 Ok((chr_idx, text_off, chr_len)) => {
181 println!(
182 " Result: chr_idx={}, text_off={}, chr_len={}",
183 chr_idx, text_off, chr_len
184 );
185 if (chr_idx as usize) < index.refnames().len() {
186 println!(" Chromosome name: {}", index.refnames()[chr_idx as usize]);
187 }
188 }
189 Err(e) => {
190 println!(" Error: {:?}", e);
191 }
192 }
193 }
194
195 if test {
197 println!("\n=== Testing known positions ===");
198 println!("Note: The 'off' parameter should be the BWT offset (without N's),");
199 println!("not the genome offset (with N's).");
200 println!();
201
202 let bwt_len = index.params().len;
206 println!("BWT length (len parameter): {}", bwt_len);
207
208 println!("\nTesting BWT row 0:");
211 match index.get_offset(0) {
212 Ok(off) => {
213 println!(" get_offset(0) = {}", off);
214 match index.joined_to_text_off(100, off, true) {
215 Ok((chr_idx, text_off, _chr_len)) => {
216 let chr_name = if (chr_idx as usize) < index.refnames().len() {
217 index.refnames()[chr_idx as usize].clone()
218 } else {
219 format!("?{}", chr_idx)
220 };
221 println!(
222 " joined_to_text_off({}, {}) = {}@{}",
223 100, off, chr_name, text_off
224 );
225 }
226 Err(e) => println!(" Error in joined_to_text_off: {:?}", e),
227 }
228 }
229 Err(e) => println!(" Error in get_offset: {:?}", e),
230 }
231
232 println!("\nzOff = {}", index.z_off());
234 match index.get_offset(index.z_off() as u64) {
235 Ok(off) => println!(" get_offset(zOff) = {} (should be 0)", off),
236 Err(e) => println!(" Error: {:?}", e),
237 }
238
239 println!("\n=== Binary search trace for offset 11789656 ===");
242 let target_off = 11789656u64;
243 let rstarts = index.rstarts();
244 let n_frag = index.n_frag() as usize;
245
246 let mut top = 0usize;
247 let mut bot = n_frag;
248 let mut steps = 0;
249
250 while top < bot && steps < 20 {
251 let mid = top + ((bot - top) >> 1);
252 let lower = rstarts[mid * 3] as u64;
253 let upper = if mid + 1 >= n_frag {
254 index.params().len
255 } else {
256 rstarts[(mid + 1) * 3] as u64
257 };
258
259 steps += 1;
260 if steps <= 10 {
261 println!(
262 " step {}: mid={}, lower={}, upper={}, target={}",
263 steps, mid, lower, upper, target_off
264 );
265 }
266
267 if lower <= target_off && upper > target_off {
268 let tidx = rstarts[mid * 3 + 1];
269 let frag_off_base = rstarts[mid * 3 + 2];
270 let frag_off = target_off - lower;
271 let text_off = frag_off + frag_off_base as u64;
272
273 println!(" FOUND: fragment {}", mid);
274 println!(" tidx (chr_idx) = {}", tidx);
275 println!(" frag_off_base = {}", frag_off_base);
276 println!(" frag_off = {} - {} = {}", target_off, lower, frag_off);
277 println!(
278 " text_off = {} + {} = {}",
279 frag_off, frag_off_base, text_off
280 );
281 break;
282 } else if lower <= target_off {
283 top = mid + 1;
284 } else {
285 bot = mid;
286 }
287 }
288 }
289 }
290 }
291
292 Ok(())
293}