lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
//! Gitignore-style ignore rules for the ingest walker.
//!
//! The checker is built once per ingest invocation and reused across every
//! visited path. Rules come from a `.lantern-ignore` file found by walking
//! up from the ingest path (like git finds `.gitignore`); when none is
//! present, a small set of defaults keeps common build/VCS directories out.

use anyhow::{Context, Result};
use ignore::gitignore::{Gitignore, GitignoreBuilder};
use std::fs;
use std::path::{Path, PathBuf};

pub const IGNORE_FILENAME: &str = ".lantern-ignore";

/// Directories we skip by default when the user has not supplied a
/// `.lantern-ignore`. These are almost never worth indexing and would
/// otherwise pull in large amounts of generated or vendored text.
pub const DEFAULT_PATTERNS: &[&str] = &[
    ".git/",
    "target/",
    "node_modules/",
    ".hermes/",
    "__pycache__/",
    ".venv/",
    "vendor/",
];

/// A compiled set of ignore rules plus the root they are matched against.
pub struct IgnoreRules {
    matcher: Gitignore,
    bypass: bool,
}

impl IgnoreRules {
    /// Disabled matcher: every `is_ignored` call returns false.
    pub fn disabled() -> Self {
        Self {
            matcher: Gitignore::empty(),
            bypass: true,
        }
    }

    /// Load rules starting from `ingest_path`. Walks up the directory tree
    /// looking for a `.lantern-ignore`; if found, uses its patterns rooted
    /// at that directory. Otherwise uses the built-in defaults rooted at
    /// the ingest directory itself.
    pub fn load(ingest_path: &Path) -> Result<Self> {
        let start_dir = start_directory(ingest_path);
        let start_abs = fs::canonicalize(&start_dir).unwrap_or(start_dir);

        let found = find_ignore_file(&start_abs);

        let matcher = match found {
            Some((file, root)) => {
                let mut builder = GitignoreBuilder::new(&root);
                if let Some(err) = builder.add(&file) {
                    return Err(
                        anyhow::Error::from(err).context(format!("parse {}", file.display()))
                    );
                }
                builder
                    .build()
                    .with_context(|| format!("compile rules from {}", file.display()))?
            }
            None => {
                let mut builder = GitignoreBuilder::new(&start_abs);
                for pat in DEFAULT_PATTERNS {
                    builder
                        .add_line(None, pat)
                        .with_context(|| format!("default pattern: {pat}"))?;
                }
                builder.build().context("compile default ignore rules")?
            }
        };

        Ok(Self {
            matcher,
            bypass: false,
        })
    }

    /// Returns true if `path` matches an ignore rule. `is_dir` should
    /// reflect whether the path points at a directory, so directory-only
    /// patterns (`foo/`) behave correctly.
    pub fn is_ignored(&self, path: &Path, is_dir: bool) -> bool {
        if self.bypass {
            return false;
        }
        self.matcher
            .matched_path_or_any_parents(path, is_dir)
            .is_ignore()
    }
}

fn start_directory(ingest_path: &Path) -> PathBuf {
    match fs::metadata(ingest_path) {
        Ok(m) if m.is_dir() => ingest_path.to_path_buf(),
        _ => ingest_path
            .parent()
            .map(Path::to_path_buf)
            .unwrap_or_else(|| PathBuf::from(".")),
    }
}

fn find_ignore_file(start: &Path) -> Option<(PathBuf, PathBuf)> {
    let mut cursor: &Path = start;
    loop {
        let candidate = cursor.join(IGNORE_FILENAME);
        if candidate.is_file() {
            return Some((candidate, cursor.to_path_buf()));
        }
        match cursor.parent() {
            Some(p) if p != cursor => cursor = p,
            _ => return None,
        }
    }
}