Mem_Parser 0.1.0

Zero-copy log parser with mmap input, streaming lines, and optional bump arena AST
Documentation
//! Contiguous backing storage for parsers: borrowed bytes/slice or memory-mapped file.
//!
//! # Safety contract for [`MappedFile`] (mmap)
//!
//! Maps the file **read-only**. UB if the backing file length changes concurrently (truncate,
//! remap) during use of the mapped slice—as with any mmap consumer—or if the filesystem layer
//! returns inconsistent views. Prefer opening the mapping only after ingestion has finished when
//! possible.
//!
//! The parser does not synchronize with external writers.

use crate::ParseError;

use std::fmt;
use std::fs::File;
use std::io;
use std::path::Path;

/// Read-only mmap of an entire file, owning the [`memmap2::Mmap`] handle.
///
/// Lifetimes from [`MappedFile::as_bytes`] / [`MappedFile::as_str_checked`] attach to **`self`**.
pub struct MappedFile {
    mmap: memmap2::Mmap,
}

impl fmt::Debug for MappedFile {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("MappedFile")
            .field("len", &self.mmap.len())
            .finish()
    }
}

impl MappedFile {
    /// Memory-map `path` read-only.
    pub fn map_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
        let file = File::open(path)?;
        let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? };
        Ok(Self { mmap })
    }

    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        &self.mmap[..]
    }

    /// Validates the entire mapping as UTF-8 (use for smaller files or when upfront cost is ok).
    pub fn as_str_checked(&self) -> Result<&str, ParseError> {
        std::str::from_utf8(self.as_bytes()).map_err(|e| ParseError::InvalidUtf8 {
            byte: e.valid_up_to(),
        })
    }

    #[inline]
    pub fn len(&self) -> usize {
        self.mmap.len()
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
        self.mmap.is_empty()
    }

    #[inline]
    pub fn log_stream(
        &self,
        dialect: crate::LogDialect,
        max_line_bytes: Option<usize>,
    ) -> crate::LogStream<'_> {
        crate::LogStream::new(self.as_bytes(), dialect, max_line_bytes)
    }
}

/// Convenience holder for contiguous input (`mmap` vs caller-owned/borrowed string).
///
/// Borrowed variants keep zero-copy `'src`; [`MappedFile`] is owned internally.
#[derive(Debug)]
pub enum Source<'src> {
    /// UTF-8 string slice (validated by type).
    Str(&'src str),
    /// Raw bytes validated per line or whole-buffer by API you call.
    Bytes(&'src [u8]),
    Mapped(MappedFile),
}

impl<'src> Source<'src> {
    #[inline]
    pub fn borrowed_str(s: &'src str) -> Self {
        Source::Str(s)
    }

    #[inline]
    pub fn borrowed_bytes(b: &'src [u8]) -> Self {
        Source::Bytes(b)
    }

    #[inline]
    pub fn mapped(m: MappedFile) -> Self {
        Source::Mapped(m)
    }

    /// Full buffer as validated UTF-8 (fails for [`Source::Bytes`] if invalid).
    pub fn whole_str_checked(&self) -> Result<&str, ParseError> {
        match self {
            Source::Str(s) => Ok(s),
            Source::Bytes(b) => std::str::from_utf8(b).map_err(|e| ParseError::InvalidUtf8 {
                byte: e.valid_up_to(),
            }),
            Source::Mapped(m) => m.as_str_checked(),
        }
    }

    #[inline]
    pub fn as_bytes_full(&self) -> &[u8] {
        match self {
            Source::Str(s) => s.as_bytes(),
            Source::Bytes(b) => b,
            Source::Mapped(m) => m.as_bytes(),
        }
    }
}