Skip to main content

fgumi_simd_fastq/
lib.rs

1//! SIMD-accelerated FASTQ parsing using Helicase-style bitmask operations.
2//!
3//! This crate provides high-throughput FASTQ parsing by processing 64 bytes at a time
4//! through SIMD registers (NEON on ARM, AVX2 on `x86_64`), classifying newline characters
5//! via bitmask operations and finding record boundaries without per-byte branching.
6//!
7//! # Architecture
8//!
9//! 1. **Lexer**: Loads 64-byte blocks into SIMD registers, produces a `u64` bitmask where
10//!    bit `i` is set if byte `i` is a newline (`\n`).
11//! 2. **Parser**: Walks the newline bitmask with `trailing_zeros()` to find record
12//!    boundaries. Every 4th newline marks the end of a FASTQ record.
13//!
14//! # Example
15//!
16//! ```
17//! use fgumi_simd_fastq::{find_record_offsets, parse_records};
18//!
19//! let fastq = b"@r1\nACGT\n+\nIIII\n@r2\nTTTT\n+\nJJJJ\n";
20//! let offsets = find_record_offsets(fastq);
21//! assert_eq!(offsets, vec![0, 16, 32]);
22//!
23//! let records: Vec<_> = parse_records(fastq).collect();
24//! assert_eq!(records.len(), 2);
25//! assert_eq!(records[0].name, b"r1");
26//! assert_eq!(records[0].sequence, b"ACGT");
27//! ```
28
29mod bitmask;
30mod lexer;
31mod parser;
32mod reader;
33
34pub use bitmask::FastqBitmask;
35pub use lexer::lex_block_full;
36pub use parser::{FastqRecord, find_record_offsets, parse_records};
37pub use reader::SimdFastqReader;