use std::collections::HashMap;
use std::io;
use std::io::BufRead;
use std::path::Path;
#[derive(Debug, Clone, PartialEq)]
pub struct FaiEntry
{
pub name: String,
pub length: u64,
pub offset: u64,
pub line_bases: u64,
pub line_width: u64,
}
impl FaiEntry
{
pub fn offset_for_position(&self, start: u64) -> u64
{
let full_lines = start / self.line_bases;
let col = start % self.line_bases;
self.offset + (full_lines * self.line_width) + col
}
pub fn region_length(&self, start: u64, end: u64) -> u64
{
let clamped_end = end.min(self.length);
let clamped_start = start.min(self.length);
clamped_end.saturating_sub(clamped_start)
}
}
#[derive(Debug, Clone)]
pub struct FaiIndex
{
pub entries: HashMap<String, FaiEntry>,
}
impl FaiIndex
{
pub fn from_path(path: &Path) -> io::Result<Self>
{
let file = std::fs::File::open(path)?;
let reader = io::BufReader::new(file);
Self::from_reader(reader)
}
pub fn from_reader<R: BufRead>(reader: R) -> io::Result<Self>
{
let mut entries = HashMap::new();
for (line_num, line_result) in reader.lines().enumerate()
{
let line = line_result?;
let line = line.trim();
if line.is_empty() || line.starts_with('#')
{
continue;
}
let parts: Vec<&str> = line.split('\t').collect();
if parts.len() != 5
{
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Invalid FAI format at line {}: expected 5 fields, got {}",
line_num + 1,
parts.len()
),
));
}
let name = parts[0].to_string();
let length = parts[1].parse::<u64>().map_err(|_| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid length at line {}: '{}'", line_num + 1, parts[1]),
)
})?;
let offset = parts[2].parse::<u64>().map_err(|_| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid offset at line {}: '{}'", line_num + 1, parts[2]),
)
})?;
let line_bases = parts[3].parse::<u64>().map_err(|_| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid line_bases at line {}: '{}'", line_num + 1, parts[3]),
)
})?;
let line_width = parts[4].parse::<u64>().map_err(|_| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid line_width at line {}: '{}'", line_num + 1, parts[4]),
)
})?;
if line_width < line_bases
{
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Invalid line_width < line_bases at line {}: {} < {}",
line_num + 1,
line_width,
line_bases
),
));
}
let entry = FaiEntry {
name,
length,
offset,
line_bases,
line_width,
};
entries.insert(entry.name.clone(), entry);
}
Ok(FaiIndex { entries })
}
pub fn get(&self, name: &str) -> Option<&FaiEntry>
{
self.entries.get(name)
}
pub fn contains(&self, name: &str) -> bool
{
self.entries.contains_key(name)
}
pub fn len(&self) -> usize
{
self.entries.len()
}
pub fn is_empty(&self) -> bool
{
self.entries.is_empty()
}
pub fn sequence_names(&self) -> impl Iterator<Item = &str>
{
self.entries.keys().map(|s| s.as_str())
}
pub fn entries(&self) -> impl Iterator<Item = &FaiEntry>
{
self.entries.values()
}
}
#[cfg(test)]
mod tests
{
use super::*;
#[test]
fn test_from_reader()
{
let data = "chr1\t100\t0\t80\t81\nchr2\t200\t100\t80\t81\n";
let index = FaiIndex::from_reader(io::BufReader::new(data.as_bytes())).unwrap();
assert_eq!(index.len(), 2);
assert!(index.contains("chr1"));
assert!(index.contains("chr2"));
}
#[test]
fn test_from_path()
{
let data = "chr1\t100\t0\t80\t81\n";
let path = Path::new("test.fai");
std::fs::write(path, data).unwrap();
let index = FaiIndex::from_path(path).unwrap();
assert_eq!(index.len(), 1);
let chr1 = index.get("chr1").unwrap();
assert_eq!(chr1.length, 100);
assert_eq!(chr1.offset, 0);
assert_eq!(chr1.line_bases, 80);
assert_eq!(chr1.line_width, 81);
std::fs::remove_file(path).unwrap();
}
#[test]
fn test_offset_for_position()
{
let entry = FaiEntry {
name: "test".to_string(),
length: 1000,
offset: 100,
line_bases: 80,
line_width: 81,
};
assert_eq!(entry.offset_for_position(0), 100);
assert_eq!(entry.offset_for_position(79), 179);
assert_eq!(entry.offset_for_position(80), 181);
assert_eq!(entry.offset_for_position(100), 201);
}
#[test]
fn test_region_length()
{
let entry = FaiEntry {
name: "test".to_string(),
length: 1000,
offset: 0,
line_bases: 80,
line_width: 81,
};
assert_eq!(entry.region_length(100, 200), 100);
assert_eq!(entry.region_length(900, 2000), 100);
}
#[test]
fn test_empty_lines_and_comments()
{
let data = "# comment\n\nchr1\t100\t0\t80\t81\n\n";
let index = FaiIndex::from_reader(io::BufReader::new(data.as_bytes())).unwrap();
assert_eq!(index.len(), 1);
}
}