talon-cli 0.4.2

Talon CLI: hybrid retrieval over Obsidian vaults and markdown corpora, with grounded answers, MCP server, and agent-native output.
Documentation
use eyre::{Result, WrapErr as _};
use talon_core::{AskSource, SearchResponse, SearchResult, TalonConfig};

#[derive(Debug)]
struct ChunkSource {
    text: String,
    heading_path: Option<String>,
    chunk_index: u32,
    term_matches: u32,
}

#[derive(Debug)]
struct RankedAskSource {
    source: AskSource,
    chunk_rank: u32,
    result_rank: usize,
    chunk_index: u32,
}

pub(super) fn build_ask_sources(
    search_response: &SearchResponse,
    config: &TalonConfig,
    queries: &[String],
) -> Result<Vec<AskSource>> {
    let terms = query_terms(queries);
    let conn = talon_core::open_database_read_only(&config.db_path)
        .wrap_err_with(|| format!("opening index at {}", config.db_path.display()))?;
    let mut sources = Vec::new();

    for (result_rank, result) in search_response.results.iter().enumerate() {
        let chunks = matching_chunks(&conn, result, &terms)?;
        if chunks.is_empty() {
            sources.push(RankedAskSource {
                source: result_to_source(result),
                chunk_rank: 0,
                result_rank,
                chunk_index: 0,
            });
            continue;
        }
        for chunk in chunks {
            sources.push(RankedAskSource {
                chunk_rank: chunk.term_matches,
                chunk_index: chunk.chunk_index,
                source: chunk_to_source(result, chunk),
                result_rank,
            });
        }
    }

    sort_ranked_sources(&mut sources);
    Ok(sources.into_iter().map(|ranked| ranked.source).collect())
}

fn sort_ranked_sources(sources: &mut [RankedAskSource]) {
    sources.sort_by(|left, right| {
        right
            .source
            .score
            .total_cmp(&left.source.score)
            .then_with(|| right.chunk_rank.cmp(&left.chunk_rank))
            .then_with(|| left.result_rank.cmp(&right.result_rank))
            .then_with(|| left.chunk_index.cmp(&right.chunk_index))
    });
}

fn matching_chunks(
    conn: &talon_core::Connection,
    result: &SearchResult,
    terms: &[String],
) -> Result<Vec<ChunkSource>> {
    let mut stmt = conn
        .prepare(
            "SELECT c.text, c.heading_path, c.chunk_index
             FROM chunks c
             JOIN notes n ON n.id = c.note_id
             WHERE n.vault_path = ? AND n.active = 1
             ORDER BY c.chunk_index",
        )
        .wrap_err("preparing ask chunk query")?;
    let rows = stmt
        .query_map([result.vault_path.as_str()], |row| {
            Ok(ChunkSource {
                text: row.get(0)?,
                heading_path: row.get(1)?,
                chunk_index: row.get(2)?,
                term_matches: 0,
            })
        })
        .wrap_err("querying ask chunks")?;
    let mut chunks = Vec::new();
    for row in rows {
        let mut chunk = row.wrap_err("reading ask chunk")?;
        chunk.term_matches = chunk_score(&chunk, terms);
        if chunk.term_matches > 0 {
            chunks.push(chunk);
        }
    }
    Ok(chunks)
}

fn chunk_to_source(result: &SearchResult, chunk: ChunkSource) -> AskSource {
    let snippet = chunk.heading_path.map_or_else(
        || chunk.text.clone(),
        |heading| format!("{heading}\n{}", chunk.text),
    );
    AskSource {
        vault_path: result.vault_path.clone(),
        title: result.title.clone(),
        snippet,
        score: result.score,
    }
}

fn result_to_source(result: &SearchResult) -> AskSource {
    AskSource {
        vault_path: result.vault_path.clone(),
        title: result.title.clone(),
        snippet: result.snippet.clone(),
        score: result.score,
    }
}

fn chunk_score(chunk: &ChunkSource, terms: &[String]) -> u32 {
    let haystack = format!(
        "{}\n{}",
        chunk.heading_path.as_deref().unwrap_or(""),
        chunk.text
    )
    .to_lowercase();
    terms
        .iter()
        .filter(|term| haystack.contains(term.as_str()))
        .count()
        .try_into()
        .unwrap_or(u32::MAX)
}

fn query_terms(queries: &[String]) -> Vec<String> {
    let mut terms: Vec<String> = queries
        .iter()
        .flat_map(|query| {
            query
                .split(|c: char| !c.is_alphanumeric())
                .map(str::to_lowercase)
                .filter(|term| term.len() >= 3 && !STOP_WORDS.contains(&term.as_str()))
                .collect::<Vec<_>>()
        })
        .collect();
    terms.sort();
    terms.dedup();
    terms
}

const STOP_WORDS: &[&str] = &[
    "about", "and", "are", "for", "from", "how", "into", "like", "notes", "say", "says", "tell",
    "that", "the", "their", "them", "this", "what", "when", "where", "with", "your",
];

#[cfg(test)]
mod tests {
    use super::*;
    use talon_core::VaultPath;

    #[test]
    fn query_terms_drops_common_words() {
        let terms = query_terms(&["what do my notes say about cooking lamb".to_string()]);
        assert_eq!(terms, vec!["cooking".to_string(), "lamb".to_string()]);
    }

    #[test]
    fn chunk_score_matches_heading_and_text() {
        let chunk = ChunkSource {
            text: "Slow braise until tender.".to_string(),
            heading_path: Some("Lamb Neck".to_string()),
            chunk_index: 0,
            term_matches: 0,
        };
        assert_eq!(
            chunk_score(&chunk, &["lamb".to_string(), "braise".to_string()]),
            2
        );
    }

    #[test]
    fn chunk_to_source_prepends_heading() {
        let result = SearchResult {
            vault_path: VaultPath::parse("wiki/Lamb Neck.md")
                .unwrap_or_else(|err| panic!("valid vault path: {err}")),
            title: "Lamb Neck".to_string(),
            snippet: "fallback".to_string(),
            score: 0.8,
            raw_score: None,
            match_kind: talon_core::MatchKind::Semantic,
            scope: None,
            mtime: None,
            is_index: false,
            citations: vec![],
            links: vec![],
            backlinks: vec![],
            tags: vec![],
            aliases: vec![],
            preview_anchors: None,
        };
        let source = chunk_to_source(
            &result,
            ChunkSource {
                text: "Braise gently.".to_string(),
                heading_path: Some("Cook".to_string()),
                chunk_index: 0,
                term_matches: 0,
            },
        );
        assert_eq!(source.snippet, "Cook\nBraise gently.");
    }

    #[test]
    fn ranked_sources_keep_multiple_strong_chunks_ahead_of_weaker_notes() {
        let mut sources = vec![
            ranked_source("weak.md", 0.4, 3, 1, 0),
            ranked_source("strong.md", 0.9, 1, 0, 0),
            ranked_source("strong.md", 0.9, 2, 0, 1),
            ranked_source("middle.md", 0.7, 4, 2, 0),
        ];

        sort_ranked_sources(&mut sources);

        let paths: Vec<&str> = sources
            .iter()
            .map(|ranked| ranked.source.vault_path.as_str())
            .collect();
        assert_eq!(
            paths,
            vec!["strong.md", "strong.md", "middle.md", "weak.md"]
        );
    }

    fn ranked_source(
        path: &str,
        score: f64,
        chunk_rank: u32,
        result_rank: usize,
        chunk_index: u32,
    ) -> RankedAskSource {
        RankedAskSource {
            source: AskSource {
                vault_path: VaultPath::parse(path)
                    .unwrap_or_else(|err| panic!("valid vault path: {err}")),
                title: path.to_string(),
                snippet: path.to_string(),
                score,
            },
            chunk_rank,
            result_rank,
            chunk_index,
        }
    }
}