Skip to main content

rustalign_cli/
inspect.rs

1//! rustalign-inspect - Inspect and query FM-index
2
3use anyhow::Result;
4use clap::{Parser, Subcommand};
5
6/// RustAlign index inspection options
7#[derive(Debug, Parser)]
8pub struct InspectOptions {
9    /// Index basename
10    #[clap(required = true)]
11    pub index: String,
12
13    /// Inspection subcommand
14    #[clap(subcommand)]
15    pub command: InspectCommand,
16}
17
18/// Inspection subcommands
19#[derive(Debug, Subcommand)]
20pub enum InspectCommand {
21    /// Print index summary
22    Summary,
23
24    /// Extract reference sequences
25    Extract,
26
27    /// Print index statistics
28    Stats,
29
30    /// Test exact_match functionality
31    Test,
32
33    /// Debug position resolution
34    DebugPos {
35        /// Genome offset to resolve
36        #[clap(long)]
37        offset: Option<u64>,
38
39        /// Test specific known positions
40        #[clap(long)]
41        test: bool,
42    },
43}
44
45/// Run the rustalign-inspect tool
46pub fn run_with_options(opts: InspectOptions) -> Result<()> {
47    run_with_opts(opts)
48}
49
50/// Run the rustalign-inspect tool (legacy - parses args)
51pub fn run() -> Result<()> {
52    let opts = InspectOptions::parse();
53    run_with_options(opts)
54}
55
56#[allow(dead_code)]
57fn main() -> Result<()> {
58    run()
59}
60
61fn run_with_opts(opts: InspectOptions) -> Result<()> {
62    let index = rustalign_fmindex::Ebwt::load(&opts.index)?;
63    let params = index.params();
64
65    match opts.command {
66        InspectCommand::Summary => {
67            println!("Index: {}", opts.index);
68            println!("Reference length: {}", params.len);
69            println!("BWT length: {}", params.bwt_len);
70            println!("Number of sides: {}", params.num_sides);
71            println!("Side size: {} bytes", params.side_sz);
72            println!("Total size: {} bytes", params.total_size());
73        }
74        InspectCommand::Extract => {
75            println!("Extract not yet implemented");
76        }
77        InspectCommand::Stats => {
78            println!("EbwtParams:");
79            println!("  len: {}", params.len);
80            println!("  bwt_len: {}", params.bwt_len);
81            println!("  bwt_sz: {}", params.bwt_sz);
82            println!("  line_sz: {}", params.line_sz);
83            println!("  side_sz: {}", params.side_sz);
84            println!("  side_bwt_sz: {}", params.side_bwt_sz);
85            println!("  side_bwt_len: {}", params.side_bwt_len);
86            println!("  num_sides: {}", params.num_sides);
87            println!("  off_rate: {}", params.off_rate);
88            println!("  ftab_chars: {}", params.ftab_chars);
89            println!("  ftab_len: {}", params.ftab_len);
90            println!("  ebwt_tot_len: {}", params.ebwt_tot_len);
91        }
92        InspectCommand::Test => {
93            use rustalign_common::Nuc;
94            use rustalign_fmindex::FmIndex;
95
96            println!("Testing exact_match...");
97
98            // Try a pattern from the first read: GATTGCTCGC (10 chars)
99            let pattern: Vec<Nuc> = "GATTGCTCGC"
100                .chars()
101                .map(|c| Nuc::from_ascii(c as u8).unwrap())
102                .collect();
103            println!("\nSearching for: GATTGCTCGC");
104
105            match index.exact_match(&pattern) {
106                Ok(positions) => {
107                    println!("Found {} matches", positions.len());
108                    if !positions.is_empty() {
109                        println!(
110                            "First 10 positions: {:?}",
111                            &positions[..positions.len().min(10)]
112                        );
113                    }
114                }
115                Err(e) => {
116                    println!("Error: {:?}", e);
117                }
118            }
119
120            // Try 10 A's
121            let pattern: Vec<Nuc> = vec![Nuc::A; 10];
122            println!("\nSearching for: AAAAAAAAAA");
123
124            match index.exact_match(&pattern) {
125                Ok(positions) => {
126                    println!("Found {} matches", positions.len());
127                }
128                Err(e) => {
129                    println!("Error: {:?}", e);
130                }
131            }
132        }
133        InspectCommand::DebugPos { offset, test } => {
134            println!("=== Debug Position Resolution ===");
135            println!();
136
137            // Print chromosome info
138            println!("Chromosomes:");
139            for (i, name) in index.refnames().iter().enumerate() {
140                let len = index.plen().get(i).copied().unwrap_or(0);
141                println!("  [{}] {} ({} bp)", i, name, len);
142            }
143            println!();
144
145            // Print rstarts array (first 10 entries)
146            println!("rstarts array (first 10 fragments):");
147            let rstarts = index.rstarts();
148            for i in 0..10.min(index.n_frag() as usize) {
149                let frag_start = rstarts[i * 3];
150                let text_id = rstarts[i * 3 + 1];
151                let frag_off = rstarts[i * 3 + 2];
152                println!(
153                    "  [{}] frag_start={}, text_id={}, frag_off={}",
154                    i, frag_start, text_id, frag_off
155                );
156            }
157            println!("  ... (showing first 10 of {} fragments)", index.n_frag());
158            println!();
159
160            // Also print the last few fragments
161            let n_frag = index.n_frag() as usize;
162            if n_frag > 5 {
163                println!("rstarts array (last 5 fragments):");
164                for i in (n_frag - 5)..n_frag {
165                    let frag_start = rstarts[i * 3];
166                    let text_id = rstarts[i * 3 + 1];
167                    let frag_off = rstarts[i * 3 + 2];
168                    println!(
169                        "  [{}] frag_start={}, text_id={}, frag_off={}",
170                        i, frag_start, text_id, frag_off
171                    );
172                }
173                println!();
174            }
175
176            // Test specific offset if provided
177            if let Some(off) = offset {
178                println!("Resolving offset {}:", off);
179                match index.joined_to_text_off(100, off, true) {
180                    Ok((chr_idx, text_off, chr_len)) => {
181                        println!(
182                            "  Result: chr_idx={}, text_off={}, chr_len={}",
183                            chr_idx, text_off, chr_len
184                        );
185                        if (chr_idx as usize) < index.refnames().len() {
186                            println!("  Chromosome name: {}", index.refnames()[chr_idx as usize]);
187                        }
188                    }
189                    Err(e) => {
190                        println!("  Error: {:?}", e);
191                    }
192                }
193            }
194
195            // Run tests if requested
196            if test {
197                println!("\n=== Testing known positions ===");
198                println!("Note: The 'off' parameter should be the BWT offset (without N's),");
199                println!("not the genome offset (with N's).");
200                println!();
201
202                // Test cases from critical errors:
203                // For BWT offsets, we need to subtract the cumulative N count
204                // Let's test with the actual BWT length vs genome length
205                let bwt_len = index.params().len;
206                println!("BWT length (len parameter): {}", bwt_len);
207
208                // Test specific BWT row resolutions
209                // Test row 0 (should map to start of first chromosome)
210                println!("\nTesting BWT row 0:");
211                match index.get_offset(0) {
212                    Ok(off) => {
213                        println!("  get_offset(0) = {}", off);
214                        match index.joined_to_text_off(100, off, true) {
215                            Ok((chr_idx, text_off, _chr_len)) => {
216                                let chr_name = if (chr_idx as usize) < index.refnames().len() {
217                                    index.refnames()[chr_idx as usize].clone()
218                                } else {
219                                    format!("?{}", chr_idx)
220                                };
221                                println!(
222                                    "  joined_to_text_off({}, {}) = {}@{}",
223                                    100, off, chr_name, text_off
224                                );
225                            }
226                            Err(e) => println!("  Error in joined_to_text_off: {:?}", e),
227                        }
228                    }
229                    Err(e) => println!("  Error in get_offset: {:?}", e),
230                }
231
232                // Test the zOff position (should return offset 0)
233                println!("\nzOff = {}", index.z_off());
234                match index.get_offset(index.z_off() as u64) {
235                    Ok(off) => println!("  get_offset(zOff) = {} (should be 0)", off),
236                    Err(e) => println!("  Error: {:?}", e),
237                }
238
239                // Find which fragment contains a specific BWT offset
240                // For Chr1 position 11789656, the BWT offset should be the same (no N's before it)
241                println!("\n=== Binary search trace for offset 11789656 ===");
242                let target_off = 11789656u64;
243                let rstarts = index.rstarts();
244                let n_frag = index.n_frag() as usize;
245
246                let mut top = 0usize;
247                let mut bot = n_frag;
248                let mut steps = 0;
249
250                while top < bot && steps < 20 {
251                    let mid = top + ((bot - top) >> 1);
252                    let lower = rstarts[mid * 3] as u64;
253                    let upper = if mid + 1 >= n_frag {
254                        index.params().len
255                    } else {
256                        rstarts[(mid + 1) * 3] as u64
257                    };
258
259                    steps += 1;
260                    if steps <= 10 {
261                        println!(
262                            "  step {}: mid={}, lower={}, upper={}, target={}",
263                            steps, mid, lower, upper, target_off
264                        );
265                    }
266
267                    if lower <= target_off && upper > target_off {
268                        let tidx = rstarts[mid * 3 + 1];
269                        let frag_off_base = rstarts[mid * 3 + 2];
270                        let frag_off = target_off - lower;
271                        let text_off = frag_off + frag_off_base as u64;
272
273                        println!("  FOUND: fragment {}", mid);
274                        println!("    tidx (chr_idx) = {}", tidx);
275                        println!("    frag_off_base = {}", frag_off_base);
276                        println!("    frag_off = {} - {} = {}", target_off, lower, frag_off);
277                        println!(
278                            "    text_off = {} + {} = {}",
279                            frag_off, frag_off_base, text_off
280                        );
281                        break;
282                    } else if lower <= target_off {
283                        top = mid + 1;
284                    } else {
285                        bot = mid;
286                    }
287                }
288            }
289        }
290    }
291
292    Ok(())
293}