nvs-core 0.1.0

Native Vector Store (Rust) core: bundle format, mmap reader, vector/BM25/hybrid search
Documentation
use super::SemanticUnit;

#[derive(Clone, Debug)]
pub struct TmpChunk {
    pub text: String,
    pub tokens: usize,
    pub start_page: i32,
    pub end_page: i32,
    pub has_major_heading: bool,
    pub min_heading_level: i32,
}

pub fn pack_initial_chunks(units: &[SemanticUnit], max_tokens: usize) -> Vec<TmpChunk> {
    let mut out: Vec<TmpChunk> = Vec::new();
    let mut cur = TmpChunk {
        text: String::new(),
        tokens: 0,
        start_page: -1,
        end_page: -1,
        has_major_heading: false,
        min_heading_level: i32::MAX,
    };
    for u in units {
        let t = u.text();
        let new_tokens = cur.tokens + u.total_tokens;
        if !cur.text.is_empty() && new_tokens > max_tokens {
            out.push(cur);
            cur = TmpChunk {
                text: String::new(),
                tokens: 0,
                start_page: -1,
                end_page: -1,
                has_major_heading: false,
                min_heading_level: i32::MAX,
            };
        }
        if cur.start_page < 0 {
            cur.start_page = *u.pages.iter().next().unwrap_or(&0);
        }
        if let Some(last) = u.pages.iter().next_back() {
            cur.end_page = *last;
        }
        cur.text.push_str(&t);
        cur.tokens += u.total_tokens;
        if u.has_major_heading {
            cur.has_major_heading = true;
            cur.min_heading_level = cur.min_heading_level.min(u.min_heading_level);
        }
    }
    if !cur.text.is_empty() {
        out.push(cur);
    }
    out
}