lantern 0.3.0

Local-first, provenance-aware semantic search for agent activity
Documentation
//! Display full provenance and chunk text for a single indexed source.
//!
//! `show` accepts either a full source id or an unambiguous prefix (like
//! git's short hashes) so agents and humans can copy the 32-character id
//! printed by inspect or trim it down for interactive use. The rendered
//! output reuses `export::ExportedSource` so JSON consumers see the same
//! shape they'd get from a single-source `export`.

use anyhow::{Context, Result};
use rusqlite::params;

use crate::export::{ExportedSource, LoadSourceOptions, load_source_with_options};
use crate::inspect::{ago, now_unix};
use crate::search::format_confidence_breakdown_token;
use crate::store::Store;

/// Opt-in decorations on `show`. Kept separate from `LoadSourceOptions` so the
/// CLI surface can grow show-specific knobs (e.g. truncating long chunks)
/// without leaking into the lower-level export loader.
#[derive(Debug, Clone, Copy, Default)]
pub struct ShowOptions {
    /// When `Some(n)` with `n > 0`, populate each chunk's `entities` field
    /// with up to `n` extracted entities so callers can inspect chunk-level
    /// knowledge-graph evidence next to the chunk text. `None` / `Some(0)`
    /// preserves the default flat output.
    pub with_entities: Option<usize>,
}

pub fn show(store: &Store, id_or_prefix: &str) -> Result<ExportedSource> {
    show_with_options(store, id_or_prefix, ShowOptions::default())
}

pub fn show_with_options(
    store: &Store,
    id_or_prefix: &str,
    opts: ShowOptions,
) -> Result<ExportedSource> {
    let trimmed = id_or_prefix.trim();
    if trimmed.is_empty() {
        anyhow::bail!("source id must not be empty");
    }

    let conn = store.conn();
    let candidates: Vec<String> = {
        let mut stmt =
            conn.prepare("SELECT id FROM sources WHERE id LIKE ?1 || '%' ORDER BY id LIMIT 2")?;
        let rows = stmt.query_map(params![trimmed], |row| row.get::<_, String>(0))?;
        rows.collect::<Result<Vec<_>, _>>()?
    };

    match candidates.len() {
        0 => anyhow::bail!("no source matches id {trimmed:?}"),
        1 => {
            let load_opts = LoadSourceOptions {
                with_entities: opts.with_entities,
            };
            load_source_with_options(conn, &candidates[0], now_unix(), load_opts)
                .with_context(|| format!("loading source {}", candidates[0]))
        }
        _ => anyhow::bail!(
            "source id {trimmed:?} is ambiguous; try a longer prefix (matched at least {} sources)",
            candidates.len()
        ),
    }
}

fn chunk_metadata_line(chunk: &crate::export::ExportedChunk) -> Option<String> {
    let mut parts = Vec::new();
    if let Some(role) = &chunk.role {
        parts.push(format!("role={role}"));
    }
    if let Some(session_id) = &chunk.session_id {
        parts.push(format!("session={session_id}"));
    }
    if let Some(turn_id) = &chunk.turn_id {
        parts.push(format!("turn={turn_id}"));
    }
    if let Some(parent_turn_id) = &chunk.parent_turn_id {
        parts.push(format!("parent_turn_id={parent_turn_id}"));
    }
    if let Some(tool_name) = &chunk.tool_name {
        parts.push(format!("tool={tool_name}"));
    }
    if let Some(tool_call_id) = &chunk.tool_call_id {
        parts.push(format!("tool_call_id={tool_call_id}"));
    }
    if let Some(ts) = chunk.timestamp_unix {
        parts.push(format!("ts={ts}"));
    }
    if chunk.access_count != 0 {
        parts.push(format!("access_count={}", chunk.access_count));
    }
    if let Some(last) = chunk.last_accessed_at {
        parts.push(format!("last_accessed_at={last}"));
    }
    if let Some(decay_at) = chunk.access_decay_at {
        parts.push(format!("access_decay_at={decay_at}"));
    }
    if chunk.feedback_score != 0 {
        parts.push(format!("feedback_score={}", chunk.feedback_score));
    }
    if chunk.query_success_count != 0 {
        parts.push(format!("query_success_count={}", chunk.query_success_count));
    }
    parts.push(format!("confidence={:.3}", chunk.confidence));
    parts.push(format!(
        "freshness_source={}",
        chunk.confidence_breakdown.freshness_source.as_str(),
    ));
    parts.push(format_confidence_breakdown_token(
        &chunk.confidence_breakdown,
    ));
    if parts.is_empty() {
        None
    } else {
        Some(parts.join(" "))
    }
}

pub fn print_text(source: &ExportedSource) {
    println!("source:   {}", source.source_id);
    println!("uri:      {}", source.uri);
    if let Some(p) = &source.path {
        println!("path:     {p}");
    }
    println!("kind:     {}", source.kind);
    println!("bytes:    {}", source.bytes);
    println!("sha256:   {}", source.content_sha256);
    let now = now_unix();
    println!(
        "ingested: {} ({})",
        source.ingested_at,
        ago(now, source.ingested_at)
    );
    if let Some(m) = source.mtime_unix {
        println!("mtime:    {m} ({})", ago(now, m));
    }
    println!("chunks:   {}", source.chunks.len());

    for chunk in &source.chunks {
        println!();
        println!(
            "--- chunk {ord} [bytes {start}..{end}, chars {chars}, sha {sha}] ---",
            ord = chunk.ordinal,
            start = chunk.byte_start,
            end = chunk.byte_end,
            chars = chunk.char_count,
            sha = &chunk.sha256[..12.min(chunk.sha256.len())],
        );
        if let Some(meta) = chunk_metadata_line(chunk) {
            println!("{meta}");
        }
        if let Some(line) = chunk_entities_line(chunk) {
            println!("{line}");
        }
        print!("{}", chunk.text);
        if !chunk.text.ends_with('\n') {
            println!();
        }
    }
}

/// Render the `entities=[...]` line for a chunk when the caller opted into
/// loading them. `None` means "the caller did not ask" (so no line is emitted);
/// `Some` with an empty list means "asked, none found" and renders an explicit
/// `entities=[]` so the absence is visible alongside the populated case.
fn chunk_entities_line(chunk: &crate::export::ExportedChunk) -> Option<String> {
    let entities = chunk.entities.as_ref()?;
    if entities.is_empty() {
        return Some("entities=[]".to_string());
    }
    let parts: Vec<String> = entities
        .iter()
        .map(|e| format!("{}:{}", e.kind.as_str(), e.value))
        .collect();
    Some(format!("entities=[{}]", parts.join(", ")))
}

pub fn print_json(source: &ExportedSource) -> Result<()> {
    println!("{}", serde_json::to_string_pretty(source)?);
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::{chunk_entities_line, chunk_metadata_line};
    use crate::entities::{ChunkEntity, EntityKind};
    use crate::export::ExportedChunk;
    use crate::search::{ConfidenceBreakdown, FreshnessSource};

    fn chunk_with_metadata(
        access_count: i64,
        feedback_score: i64,
        query_success_count: i64,
        confidence: f64,
    ) -> ExportedChunk {
        ExportedChunk {
            chunk_id: "chunk-1".into(),
            ordinal: 0,
            byte_start: 0,
            byte_end: 12,
            char_count: 12,
            sha256: "abcdef0123456789".into(),
            text: "hello world".into(),
            role: Some("assistant".into()),
            session_id: Some("sess-7".into()),
            turn_id: Some("turn-9".into()),
            parent_turn_id: Some("turn-8".into()),
            tool_name: Some("search".into()),
            tool_call_id: Some("call-1".into()),
            timestamp_unix: Some(1_700_000_003),
            access_count,
            last_accessed_at: Some(1_700_000_500),
            access_decay_at: Some(1_700_000_800),
            feedback_score,
            query_success_count,
            confidence,
            confidence_breakdown: ConfidenceBreakdown {
                freshness: 0.5,
                freshness_source: FreshnessSource::LastAccessedAt,
                access_boost: 0.5,
                base: 0.5,
                feedback_factor: 0.0,
                query_success_factor: 0.0,
            },
            entities: None,
        }
    }

    #[test]
    fn chunk_metadata_line_includes_access_and_feedback_signals() {
        let meta = chunk_metadata_line(&chunk_with_metadata(7, -2, 5, 0.732)).unwrap();
        assert!(meta.contains("access_count=7"), "{meta}");
        assert!(meta.contains("last_accessed_at=1700000500"), "{meta}");
        assert!(meta.contains("access_decay_at=1700000800"), "{meta}");
        assert!(meta.contains("parent_turn_id=turn-8"), "{meta}");
        assert!(meta.contains("feedback_score=-2"), "{meta}");
        assert!(meta.contains("query_success_count=5"), "{meta}");
        assert!(meta.contains("confidence=0.732"), "{meta}");
    }

    #[test]
    fn chunk_metadata_line_omits_zero_access_and_feedback() {
        let meta = chunk_metadata_line(&chunk_with_metadata(0, 0, 0, 0.500)).unwrap();
        assert!(!meta.contains("access_count="), "{meta}");
        assert!(!meta.contains("feedback_score="), "{meta}");
        assert!(!meta.contains("query_success_count="), "{meta}");
        assert!(meta.contains("parent_turn_id=turn-8"), "{meta}");
        assert!(meta.contains("tool_call_id=call-1"), "{meta}");
        assert!(meta.contains("last_accessed_at=1700000500"), "{meta}");
        assert!(meta.contains("confidence=0.500"), "{meta}");
    }

    #[test]
    fn chunk_metadata_line_renders_confidence_breakdown_token() {
        // Same compact shape the search formatter emits, so `show` and
        // `search` stay one-for-one inspectable without switching to JSON.
        let meta = chunk_metadata_line(&chunk_with_metadata(7, -2, 5, 0.732)).unwrap();
        let expected = "breakdown=freshness:0.50,freshness_source:last_accessed_at,access_boost:0.50,base:0.50,feedback_factor:0.00,query_success_factor:0.00";
        assert!(meta.contains(expected), "{meta}");
    }

    #[test]
    fn chunk_metadata_line_emits_breakdown_even_when_other_signals_are_zero() {
        // Zero access/feedback/query-success suppress their own tokens, but
        // the breakdown still renders so the freshness component remains
        // visible for cold chunks.
        let meta = chunk_metadata_line(&chunk_with_metadata(0, 0, 0, 0.500)).unwrap();
        assert!(meta.contains("breakdown=freshness:0.50"), "{meta}");
        assert!(meta.contains("freshness_source:last_accessed_at"), "{meta}");
    }

    #[test]
    fn chunk_metadata_line_surfaces_explicit_freshness_source_token() {
        // `freshness_source=<value>` renders as its own token next to the
        // breakdown so the precedence rule is visible at a glance without
        // parsing the compact `breakdown=...` payload.
        let mut chunk = chunk_with_metadata(0, 0, 0, 0.500);
        chunk.confidence_breakdown.freshness_source = FreshnessSource::LastAccessedAt;
        let meta = chunk_metadata_line(&chunk).unwrap();
        assert!(meta.contains("freshness_source=last_accessed_at"), "{meta}");

        chunk.confidence_breakdown.freshness_source = FreshnessSource::TimestampUnix;
        let meta = chunk_metadata_line(&chunk).unwrap();
        assert!(meta.contains("freshness_source=timestamp_unix"), "{meta}");

        chunk.confidence_breakdown.freshness_source = FreshnessSource::None;
        let meta = chunk_metadata_line(&chunk).unwrap();
        assert!(meta.contains("freshness_source=none"), "{meta}");
    }

    #[test]
    fn chunk_metadata_line_surfaces_parent_turn_linkage() {
        let meta = chunk_metadata_line(&chunk_with_metadata(0, 0, 0, 0.500)).unwrap();
        assert!(meta.contains("parent_turn_id=turn-8"), "{meta}");
    }

    #[test]
    fn chunk_entities_line_omits_line_when_not_requested() {
        let chunk = chunk_with_metadata(0, 0, 0, 0.500);
        assert!(chunk_entities_line(&chunk).is_none());
    }

    #[test]
    fn chunk_entities_line_renders_empty_when_requested_but_none_found() {
        let mut chunk = chunk_with_metadata(0, 0, 0, 0.500);
        chunk.entities = Some(vec![]);
        assert_eq!(chunk_entities_line(&chunk).as_deref(), Some("entities=[]"));
    }

    #[test]
    fn chunk_entities_line_renders_kind_value_pairs() {
        let mut chunk = chunk_with_metadata(0, 0, 0, 0.500);
        chunk.entities = Some(vec![
            ChunkEntity {
                id: "ent-1".into(),
                kind: EntityKind::Domain,
                value: "example.com".into(),
            },
            ChunkEntity {
                id: "ent-2".into(),
                kind: EntityKind::Mention,
                value: "alice".into(),
            },
        ]);
        assert_eq!(
            chunk_entities_line(&chunk).as_deref(),
            Some("entities=[domain:example.com, mention:alice]")
        );
    }
}