fastx 0.6.1

FastX reads Fasta and FastQ files with little overhead.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
//! Indexed FASTA/FASTQ reader for random access by sequence ID.
//!
//! This module provides `IndexedFastXReader` which enables efficient random access
//! to bgzip-compressed FASTA files using .fai and .gzi indexes.

use crate::bgzf::BgzfReader;
use crate::fai::{FaiEntry, FaiIndex};
use crate::gzi::GziIndex;
use crate::FastX::{FastARecord, FastXRead};
use std::fs::File;
use std::io::{self, Read, Seek};
use std::path::Path;

/// An indexed FASTA/FASTQ reader supporting random access by sequence ID.
///
/// This reader uses both .fai (for sequence metadata) and .gzi (for gzip seeking)
/// indexes to efficiently fetch specific sequences without reading the entire file.
///
/// # Type Parameters
///
/// * `R` - The underlying reader type (must implement Read and Seek)
///
/// # Example
///
/// ```no_run
/// use fastx::indexed::IndexedFastXReader;
/// use fastx::FastX::FastXRead;
/// use std::path::Path;
///
/// let mut reader = IndexedFastXReader::from_path(Path::new("data.fasta.gz")).unwrap();
///
/// // Fetch a specific sequence by ID
/// if let Ok(record) = reader.fetch("chr1") {
///     println!("{}: {} bp", record.id(), record.seq_len());
/// }
/// ```
///
/// # URL Support
///
/// With the `url` feature enabled, you can also read from HTTP/HTTPS URLs:
///
/// ```no_run,ignore
/// use fastx::indexed::IndexedFastXReader;
///
/// let mut reader = IndexedFastXReader::from_url(
///     "https://example.com/data.fasta.gz",
///     "https://example.com/data.fasta.gz.fai",
///     "https://example.com/data.fasta.gz.gzi"
/// ).unwrap();
/// ```
pub struct IndexedFastXReader<R: Read + Seek>
{
    /// The BGZF reader for decompression
    reader: BgzfReader<R>,
    /// The FASTA index for sequence lookup
    fai_index: FaiIndex,
}

impl<R: Read + Seek> IndexedFastXReader<R>
{
    /// Create a new indexed reader from a BGZF reader and a FASTA index.
    ///
    /// # Arguments
    ///
    /// * `reader` - A BGZF reader (which may or may not have its own GZI index)
    /// * `fai_index` - A parsed FASTA index
    pub fn new(reader: BgzfReader<R>, fai_index: FaiIndex) -> Self
    {
        Self { reader, fai_index }
    }
}

/// Type alias for local file reading
pub type LocalIndexedFastXReader = IndexedFastXReader<File>;

impl IndexedFastXReader<File>
{
    /// Open an indexed FASTA file from a local path.
    ///
    /// This looks for companion index files (.fai and optionally .gzi) alongside
    /// the specified file.
    ///
    /// # Arguments
    ///
    /// * `path` - Path to the FASTA file (can be .fasta.gz or similar)
    ///
    /// # Returns
    ///
    /// * `Ok(reader)` - The indexed reader ready for use
    /// * `Err(io::Error)` - If files cannot be opened or indexes are missing
    ///
    /// # Index Files
    ///
    /// For a file like `data.fasta.gz`:
    /// - `data.fasta.gz.fai` or `data.fasta.fai` - Required FASTA index
    /// - `data.fasta.gz.gzi` or `data.fasta.gzi` - Required gzip index for compressed files
    ///
    /// # Example
    ///
    /// ```no_run
    /// use fastx::indexed::IndexedFastXReader;
    /// use std::path::Path;
    ///
    /// let mut reader = IndexedFastXReader::from_path(Path::new("data.fasta.gz")).unwrap();
    /// ```
    pub fn from_path(path: &Path) -> io::Result<Self>
    {
        // Try to find .fai index
        let fai_path = find_index_file(path, "fai").ok_or_else(|| {
            io::Error::new(
                io::ErrorKind::NotFound,
                format!(
                    "FAI index not found for {} (expected {}.fai or {}.gz.fai)",
                    path.display(),
                    path.with_extension("").display(),
                    path.with_extension("").display(),
                ),
            )
        })?;

        let fai_index = FaiIndex::from_path(&fai_path)?;

        // Check if file is gzip compressed and look for .gzi
        let is_gzip = path.extension().map(|e| e == "gz").unwrap_or(false);

        let file = File::open(path)?;

        let reader = if is_gzip
        {
            // Try to find .gzi index
            if let Some(gzi_path) = find_index_file(path, "gzi")
            {
                let gzi_index = GziIndex::from_path(&gzi_path)?;
                BgzfReader::with_index(file, gzi_index)?
            }
            else
            {
                return Err(io::Error::new(
                    io::ErrorKind::NotFound,
                    format!(
                        "GZI index not found for compressed file {} (expected {}.gzi)",
                        path.display(),
                        path.with_extension("").display()
                    ),
                ));
            }
        }
        else
        {
            return Err(io::Error::new(
                io::ErrorKind::Unsupported,
                "Uncompressed files not yet supported, please use bgzip-compressed files",
            ));
        };

        Ok(Self { reader, fai_index })
    }

    /// Open an indexed FASTA file from HTTP/HTTPS URLs.
    ///
    /// This requires the `url` feature to be enabled.
    ///
    /// # Arguments
    ///
    /// * `data_url` - URL to the FASTA data file (.fasta.gz)
    /// * `fai_url` - URL to the .fai index file
    /// * `gzi_url` - URL to the .gzi index file
    ///
    /// # Returns
    ///
    /// * `Ok(reader)` - The indexed reader ready for use
    /// * `Err(io::Error)` - If URLs are invalid or requests fail
    ///
    /// # Example
    ///
    /// ```no_run,ignore
    /// use fastx::indexed::IndexedFastXReader;
    ///
    /// let mut reader = IndexedFastXReader::from_url(
    ///     "https://example.com/data.fasta.gz",
    ///     "https://example.com/data.fasta.gz.fai",
    ///     "https://example.com/data.fasta.gz.gzi"
    /// ).unwrap();
    ///
    /// let record = reader.fetch("chr1").unwrap();
    /// println!("{}: {} bp", record.id(), record.seq_len());
    /// ```
    #[cfg(feature = "url")]
    pub fn from_url(
        data_url: impl Into<String>,
        fai_url: impl Into<String>,
        gzi_url: impl Into<String>,
    ) -> io::Result<IndexedFastXReader<crate::remote::RemoteReader>>
    {
        use crate::remote::RemoteReader;

        // Fetch and parse the FAI index
        let fai_url = fai_url.into();
        let fai_data = fetch_url(&fai_url)?;
        let fai_index = parse_fai_from_bytes(&fai_data)?;

        // Fetch and parse the GZI index
        let gzi_url = gzi_url.into();
        let gzi_data = fetch_url(&gzi_url)?;
        let gzi_index = parse_gzi_from_bytes(&gzi_data)?;

        // Create the remote reader
        let remote_reader = RemoteReader::new(data_url)?;
        let reader = BgzfReader::with_index(remote_reader, gzi_index)?;

        Ok(IndexedFastXReader { reader, fai_index })
    }
}

impl<R: Read + Seek> IndexedFastXReader<R>
{
    /// Fetch a sequence by its ID.
    ///
    /// Reads the entire sequence from the file using the index.
    ///
    /// # Arguments
    ///
    /// * `seq_id` - The sequence identifier (e.g., "chr1", "gene123")
    ///
    /// # Returns
    ///
    /// * `Ok(FastARecord)` - The fetched sequence record
    /// * `Err(io::Error)` - If the sequence is not found or reading fails
    ///
    /// # Example
    ///
    /// ```no_run
    /// use fastx::indexed::IndexedFastXReader;
    /// use fastx::FastX::FastXRead;
    /// use std::path::Path;
    ///
    /// let mut reader = IndexedFastXReader::from_path(Path::new("data.fasta.gz")).unwrap();
    ///
    /// match reader.fetch("chr1") {
    ///     Ok(record) => println!("Got sequence {}: {} bp", record.id(), record.seq_len()),
    ///     Err(e) => eprintln!("Error: {}", e),
    /// }
    /// ```
    pub fn fetch(&mut self, seq_id: &str) -> io::Result<FastARecord>
    {
        let entry = self.fai_index.get(seq_id).ok_or_else(|| {
            io::Error::new(
                io::ErrorKind::NotFound,
                format!("Sequence '{}' not found in index", seq_id),
            )
        })?;

        // Clone the entry to avoid borrowing issues
        let entry = entry.clone();
        self.fetch_entry(&entry)
    }

    /// Fetch a specific region of a sequence.
    ///
    /// # Arguments
    ///
    /// * `seq_id` - The sequence identifier
    /// * `start` - 0-based start position
    /// * `end` - End position (exclusive)
    ///
    /// # Returns
    ///
    /// * `Ok(Vec<u8>)` - The sequence data for the requested region
    /// * `Err(io::Error)` - If the sequence is not found or reading fails
    ///
    /// # Example
    ///
    /// ```no_run
    /// use fastx::indexed::IndexedFastXReader;
    /// use std::path::Path;
    ///
    /// let mut reader = IndexedFastXReader::from_path(Path::new("data.fasta.gz")).unwrap();
    ///
    /// // Fetch bases 1000-2000 of chr1
    /// let region = reader.fetch_range("chr1", 1000, 2000).unwrap();
    /// println!("Region length: {} bp", region.len());
    /// ```
    pub fn fetch_range(&mut self, seq_id: &str, start: u64, end: u64) -> io::Result<Vec<u8>>
    {
        let entry = self.fai_index.get(seq_id).ok_or_else(|| {
            io::Error::new(
                io::ErrorKind::NotFound,
                format!("Sequence '{}' not found in index", seq_id),
            )
        })?;

        // Clone to avoid borrowing issues
        let entry = entry.clone();

        if start >= entry.length
        {
            return Err(io::Error::new(
                io::ErrorKind::InvalidInput,
                format!("Start position {} beyond sequence length {}", start, entry.length),
            ));
        }

        let clamped_end = end.min(entry.length);
        let region_length = clamped_end - start;

        // Calculate file offset for start position
        let start_offset = entry.offset_for_position(start);

        // Seek to the start position
        self.reader.seek_uncompressed(start_offset)?;

        // Read the sequence data, handling line wrapping
        let mut seq_data = Vec::with_capacity(region_length as usize);
        let mut remaining = region_length;
        let mut col = start % entry.line_bases;

        while remaining > 0
        {
            // If we are at the end of a line's bases, skip the padding (newlines)
            if col >= entry.line_bases
            {
                let padding = entry.line_width - entry.line_bases;
                if padding > 0
                {
                    // Skip padding bytes (newlines)
                    // We need to read and discard them because we can't easily seek relative
                    // in the compressed stream without resetting the decompressor state
                    let mut trash = vec![0u8; padding as usize];
                    self.reader.read_exact(&mut trash)?;
                }
                col = 0;
            }

            // Calculate how much we can read from the current line
            let in_line = std::cmp::min(remaining, entry.line_bases - col);

            // Read that many bytes
            let mut buf = vec![0u8; in_line as usize];
            let n = self.reader.read(&mut buf)?;
            if n == 0
            {
                return Err(io::Error::new(
                    io::ErrorKind::UnexpectedEof,
                    "Unexpected end of file while reading sequence",
                ));
            }
            seq_data.extend_from_slice(&buf[..n]);
            remaining -= n as u64;
            col += n as u64; // Advance column by actual bytes read
        }

        Ok(seq_data)
    }

    /// Fetch a sequence using its FAI entry directly.
    fn fetch_entry(&mut self, entry: &FaiEntry) -> io::Result<FastARecord>
    {
        // The FAI offset points to the sequence data (after the header line).
        // We need to find the header start by seeking backwards to find the '>' character.
        // Search up to 4KB backwards which should be enough for any header.
        const MAX_HEADER_SEARCH: u64 = 4096;

        let header_offset = entry.offset.saturating_sub(MAX_HEADER_SEARCH);

        // Seek to where the header might start
        self.reader.seek_uncompressed(header_offset)?;

        // Read data until we find '>' or reach entry.offset
        let buffer_len = (entry.offset - header_offset) as usize;
        let mut buffer = vec![0u8; buffer_len];
        self.reader.read_exact(&mut buffer)?;

        // Find the last '>' before entry.offset (the header start)
        let header_start = match buffer.iter().rposition(|&b| b == b'>')
        {
            Some(pos) => header_offset + pos as u64,
            None =>
            {
                return Err(io::Error::new(
                    io::ErrorKind::InvalidData,
                    format!("Could not find FASTA header for sequence '{}'", entry.name),
                ));
            }
        };

        // Seek to the header start and parse the record
        self.reader.seek_uncompressed(header_start)?;

        let mut record = FastARecord::default();
        record.read(&mut self.reader)?;

        Ok(record)
    }

    /// Get a reference to the FAI index.
    pub fn index(&self) -> &FaiIndex
    {
        &self.fai_index
    }

    /// Get a reference to the GZI index, if available.
    pub fn gzi_index(&self) -> Option<&GziIndex>
    {
        self.reader.gzi_index()
    }

    /// Check if a sequence exists in the index.
    ///
    /// # Arguments
    ///
    /// * `seq_id` - The sequence identifier to check
    pub fn contains(&self, seq_id: &str) -> bool
    {
        self.fai_index.contains(seq_id)
    }

    /// Get all sequence names in the index.
    pub fn sequence_names(&self) -> Vec<&str>
    {
        self.fai_index.sequence_names().collect()
    }
}

/// Fetch data from a URL (requires `url` feature).
#[cfg(feature = "url")]
#[allow(dead_code)]
fn fetch_url(url: &str) -> io::Result<Vec<u8>>
{
    let agent = ureq::Agent::new_with_defaults();

    let response = agent.get(url).call().map_err(|e| {
        io::Error::new(
            io::ErrorKind::ConnectionRefused,
            format!("HTTP GET request failed for {}: {}", url, e),
        )
    })?;

    let data = response.into_body().read_to_vec().map_err(|e| {
        io::Error::new(
            io::ErrorKind::ConnectionRefused,
            format!("Failed to read response body: {}", e),
        )
    })?;

    Ok(data)
}

/// Parse FAI index from bytes (for URL support).
#[allow(dead_code)]
fn parse_fai_from_bytes(data: &[u8]) -> io::Result<FaiIndex>
{
    use crate::fai::FaiEntry;
    use std::collections::HashMap;

    let text = std::str::from_utf8(data)
        .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "FAI data is not valid UTF-8"))?;

    let mut entries = HashMap::new();

    for (line_num, line) in text.lines().enumerate()
    {
        let line = line.trim();

        // Skip empty lines and comments
        if line.is_empty() || line.starts_with('#')
        {
            continue;
        }

        let parts: Vec<&str> = line.split('\t').collect();
        if parts.len() != 5
        {
            return Err(io::Error::new(
                io::ErrorKind::InvalidData,
                format!(
                    "Invalid FAI format at line {}: expected 5 fields, got {}",
                    line_num + 1,
                    parts.len()
                ),
            ));
        }

        let name = parts[0].to_string();
        let length = parts[1].parse::<u64>().map_err(|_| {
            io::Error::new(
                io::ErrorKind::InvalidData,
                format!("Invalid length at line {}: '{}'", line_num + 1, parts[1]),
            )
        })?;
        let offset = parts[2].parse::<u64>().map_err(|_| {
            io::Error::new(
                io::ErrorKind::InvalidData,
                format!("Invalid offset at line {}: '{}'", line_num + 1, parts[2]),
            )
        })?;
        let line_bases = parts[3].parse::<u64>().map_err(|_| {
            io::Error::new(
                io::ErrorKind::InvalidData,
                format!("Invalid line_bases at line {}: '{}'", line_num + 1, parts[3]),
            )
        })?;
        let line_width = parts[4].parse::<u64>().map_err(|_| {
            io::Error::new(
                io::ErrorKind::InvalidData,
                format!("Invalid line_width at line {}: '{}'", line_num + 1, parts[4]),
            )
        })?;

        if line_width < line_bases
        {
            return Err(io::Error::new(
                io::ErrorKind::InvalidData,
                format!(
                    "Invalid line_width < line_bases at line {}: {} < {}",
                    line_num + 1,
                    line_width,
                    line_bases
                ),
            ));
        }

        let entry = FaiEntry {
            name,
            length,
            offset,
            line_bases,
            line_width,
        };

        entries.insert(entry.name.clone(), entry);
    }

    // Use internal constructor to create FaiIndex
    Ok(FaiIndex { entries })
}

/// Parse GZI index from bytes (for URL support).
#[allow(dead_code)]
fn parse_gzi_from_bytes(data: &[u8]) -> io::Result<GziIndex>
{
    if data.len() < 8
    {
        return Err(io::Error::new(
            io::ErrorKind::InvalidData,
            "GZI data too short (less than 8 bytes)",
        ));
    }

    // Read number of entries (little-endian u64)
    let num_entries = u64::from_le_bytes([
        data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
    ]) as usize;

    let expected_size = 8 + num_entries * 16;
    if data.len() < expected_size
    {
        return Err(io::Error::new(
            io::ErrorKind::InvalidData,
            format!("GZI data too short: expected {} bytes, got {}", expected_size, data.len()),
        ));
    }

    let mut entries = Vec::with_capacity(num_entries);
    let mut offset = 8;

    for _ in 0..num_entries
    {
        let compressed = u64::from_le_bytes([
            data[offset],
            data[offset + 1],
            data[offset + 2],
            data[offset + 3],
            data[offset + 4],
            data[offset + 5],
            data[offset + 6],
            data[offset + 7],
        ]);
        offset += 8;

        let uncompressed = u64::from_le_bytes([
            data[offset],
            data[offset + 1],
            data[offset + 2],
            data[offset + 3],
            data[offset + 4],
            data[offset + 5],
            data[offset + 6],
            data[offset + 7],
        ]);
        offset += 8;

        entries.push((compressed, uncompressed));
    }

    // Verify entries are sorted by uncompressed offset
    for i in 1..entries.len()
    {
        if entries[i].1 < entries[i - 1].1
        {
            return Err(io::Error::new(
                io::ErrorKind::InvalidData,
                "GZI entries not sorted by uncompressed offset",
            ));
        }
    }

    // Use internal constructor to create GziIndex
    Ok(GziIndex { entries })
}

use std::path::PathBuf;

/// Find an index file for a given data file.
///
/// Tries multiple patterns:
/// - For `data.fasta.gz`: tries `data.fasta.gz.fai` then `data.fasta.fai`
/// - For `data.fasta`: tries `data.fasta.fai`
fn find_index_file(path: &Path, ext: &str) -> Option<PathBuf>
{
    let stem = path.with_extension("");

    // Try path + . + ext (e.g., data.fasta.gz.fai)
    let direct = PathBuf::from(format!("{}.{}", path.display(), ext));
    if direct.exists()
    {
        return Some(direct);
    }

    // Try stem + . + ext (e.g., data.fasta.fai for data.fasta.gz)
    let stem_index = PathBuf::from(format!("{}.{}", stem.display(), ext));
    if stem_index.exists()
    {
        return Some(stem_index);
    }

    None
}

#[cfg(test)]
mod tests
{
    use super::*;

    #[test]
    fn test_find_index_file()
    {
        // Create test files
        let fasta_path = Path::new("test_find.fasta.gz");
        let fai1 = Path::new("test_find.fasta.gz.fai");
        let fai2 = Path::new("test_find.fasta.fai");

        std::fs::write(fasta_path, b">test\nACGT\n").unwrap();

        // Test with .gz.fai extension
        std::fs::write(fai1, b"test\t4\t6\n").unwrap();
        let result = find_index_file(fasta_path, "fai");
        assert!(result.is_some());
        assert_eq!(result.unwrap(), fai1);
        std::fs::remove_file(fai1).unwrap();

        // Test with .fai extension (for .gz file)
        std::fs::write(fai2, b"test\t4\t6\n").unwrap();
        let result = find_index_file(fasta_path, "fai");
        assert!(result.is_some());
        assert_eq!(result.unwrap(), fai2);

        // Cleanup
        std::fs::remove_file(fai2).unwrap();
        std::fs::remove_file(fasta_path).unwrap();
    }

    #[test]
    fn test_index_file_not_found()
    {
        let path = Path::new("nonexistent.fasta.gz");
        let result = find_index_file(path, "fai");
        assert!(result.is_none());
    }

    #[test]
    fn test_parse_fai_from_bytes()
    {
        let data = b"chr1\t1000\t0\t80\t81\nchr2\t2000\t1000\t80\t81\n";
        let index = parse_fai_from_bytes(data).unwrap();
        assert_eq!(index.len(), 2);
        assert!(index.contains("chr1"));
        assert!(index.contains("chr2"));

        let chr1 = index.get("chr1").unwrap();
        assert_eq!(chr1.length, 1000);
        assert_eq!(chr1.offset, 0);
    }

    #[test]
    fn test_parse_gzi_from_bytes()
    {
        let data: Vec<u8> = vec![
            2, 0, 0, 0, 0, 0, 0, 0, // num_entries = 2
            0, 0, 0, 0, 0, 0, 0, 0, // Entry 0: compressed = 0
            0, 0, 0, 0, 0, 0, 0, 0, // Entry 0: uncompressed = 0
            100, 0, 0, 0, 0, 0, 0, 0, // Entry 1: compressed = 100
            0, 100, 0, 0, 0, 0, 0, 0, // Entry 1: uncompressed = 10000
        ];
        let index = parse_gzi_from_bytes(&data).unwrap();
        assert_eq!(index.len(), 2);
        assert_eq!(index.get_compressed_offset(0), Some(0));
        assert_eq!(index.get_compressed_offset(5000), Some(0));
    }
}