gfa 0.10.1

Library for working with graphs in the GFA (Graphical Fragment Assembly) format
Documentation
use crate::{
    gfa::{Line, Link, Path, Segment},
    parser::GFAParser,
};

use anyhow::{bail, Result};

use memmap::Mmap;

use std::fs::File;
use std::io::prelude::*;

use bstr::ByteSlice;

#[derive(Debug)]
pub struct MmapGFA {
    pub cursor: std::io::Cursor<Mmap>,
    pub line_buf: Vec<u8>,
    pub current_line_len: usize,
    pub last_buf_offset: usize,
    pub parser: GFAParser<usize, ()>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LineType {
    Segment,
    Link,
    Path,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LineIndices {
    pub segments: Vec<(usize, usize)>,
    pub links: Vec<usize>,
    pub paths: Vec<usize>,
}

#[derive(Debug)]
pub struct SegmentIter<'a> {
    mmap: &'a mut MmapGFA,
    parser: GFAParser<usize, ()>,
}

impl<'a> Iterator for SegmentIter<'a> {
    type Item = Segment<usize, ()>;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        while let Ok(line) = self.mmap.next_line() {
            if let Some(b'S') = line.first() {
                if let Some(Line::Segment(s)) =
                    self.parser.parse_gfa_line(line).ok()
                {
                    return Some(s);
                }
            }
        }
        None
    }
}

#[derive(Debug)]
pub struct LinkIter<'a> {
    mmap: &'a mut MmapGFA,
    parser: GFAParser<usize, ()>,
}

impl<'a> Iterator for LinkIter<'a> {
    type Item = Link<usize, ()>;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        while let Ok(line) = self.mmap.next_line() {
            if let Some(b'S') = line.first() {
                if let Some(Line::Link(s)) =
                    self.parser.parse_gfa_line(line).ok()
                {
                    return Some(s);
                }
            }
        }
        None
    }
}

#[derive(Debug)]
pub struct PathIter<'a> {
    mmap: &'a mut MmapGFA,
    parser: GFAParser<usize, ()>,
}

impl<'a> Iterator for PathIter<'a> {
    type Item = Path<usize, ()>;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        while let Ok(line) = self.mmap.next_line() {
            if let Some(b'S') = line.first() {
                if let Some(Line::Path(s)) =
                    self.parser.parse_gfa_line(line).ok()
                {
                    return Some(s);
                }
            }
        }
        None
    }
}

impl MmapGFA {
    pub fn new(path: &str) -> Result<Self> {
        let file = File::open(path)?;
        let mmap = unsafe { Mmap::map(&file)? };

        let cursor = std::io::Cursor::new(mmap);
        let line_buf = Vec::with_capacity(1024);
        let current_line_len = 0;
        let last_buf_offset = 0;

        let parser = GFAParser::new();

        Ok(Self {
            cursor,
            line_buf,
            current_line_len,
            last_buf_offset,
            parser,
        })
    }

    pub fn reset_position(&mut self) -> u64 {
        let cur_pos = self.cursor.position();
        self.cursor.set_position(0);
        cur_pos
    }

    pub fn set_position(&mut self, new_pos: u64) -> u64 {
        let cur_pos = self.cursor.position();
        self.cursor.set_position(new_pos);
        cur_pos
    }
    pub fn get_ref(&self) -> &[u8] {
        self.cursor.get_ref().as_ref()
    }

    pub fn get_parser(&self) -> &GFAParser<usize, ()> {
        &self.parser
    }

    pub fn next_line(&mut self) -> Result<&[u8]> {
        self.line_buf.clear();

        self.last_buf_offset = self.cursor.position() as usize;

        let n_read = self.cursor.read_until(b'\n', &mut self.line_buf)?;

        self.current_line_len = n_read;

        Ok(&self.line_buf[..n_read])
    }

    pub fn read_line_at(&mut self, offset: usize) -> Result<&[u8]> {
        self.cursor.set_position(offset as u64);
        self.next_line()
    }

    pub fn build_index(&mut self) -> Result<LineIndices> {
        let start_position = self.cursor.position();
        let current_line_len = self.current_line_len;
        let last_buf_offset = self.last_buf_offset;

        let mut segments = Vec::new();
        let mut links = Vec::new();
        let mut paths = Vec::new();

        self.cursor.set_position(0);

        let mut line_start = 0;

        loop {
            let line = self.next_line()?;
            let length = line.len();

            if let Some(ref byte) = line.first() {
                match byte {
                    b'S' => {
                        segments.push((line_start, length));
                    }
                    b'L' => {
                        links.push(line_start);
                    }
                    b'P' => {
                        paths.push(line_start);
                    }
                    _ => (),
                };

                line_start += line.len();
            } else {
                break;
            }
        }

        self.cursor.set_position(start_position);
        self.current_line_len = current_line_len;
        self.last_buf_offset = last_buf_offset;

        let res = LineIndices {
            segments,
            links,
            paths,
        };

        Ok(res)
    }

    pub fn current_line(&self) -> &[u8] {
        &self.line_buf[..self.current_line_len]
    }

    pub fn current_line_name(&self) -> Option<&[u8]> {
        let mut iter = self.line_buf.split_str("\t");
        let _lt = iter.next()?;
        let name = iter.next()?;
        Some(name)
    }

    pub fn parse_current_line(&self) -> Result<Line<usize, ()>> {
        let line = self.current_line();
        if line.is_empty() {
            bail!("Line at offset {} is empty", self.last_buf_offset);
        }

        let gfa_line = self.parser.parse_gfa_line(line)?;
        Ok(gfa_line)
    }

    pub fn iter_segments(&mut self, from_start: bool) -> SegmentIter<'_> {
        if from_start {
            self.cursor.set_position(0);
        }
        let parser = self.parser.clone();
        SegmentIter { mmap: self, parser }
    }

    pub fn iter_links(&mut self, from_start: bool) -> LinkIter<'_> {
        if from_start {
            self.cursor.set_position(0);
        }
        let parser = self.parser.clone();
        LinkIter { mmap: self, parser }
    }

    pub fn iter_paths(&mut self, from_start: bool) -> PathIter<'_> {
        if from_start {
            self.cursor.set_position(0);
        }
        let parser = self.parser.clone();
        PathIter { mmap: self, parser }
    }
}