axiomsync 0.1.6

Core data-processing engine for AxiomSync local retrieval runtime.
Documentation
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};

use crate::index::ScoredRecord;
use crate::models::{ContextHit, ScoreComponents, TracePoint};

use super::planner::PlannedQuery;

const MAX_SNIPPET_CHARS: usize = 240;

pub(super) fn make_hit(
    record: &crate::models::IndexRecord,
    score: f32,
    query: &str,
    components: Option<&ScoredRecord>,
) -> ContextHit {
    let query_tokens = tokenize_keywords(query);
    let snippet = build_snippet(record, &query_tokens);
    let matched_heading = find_matched_heading(record, &query_tokens);
    ContextHit {
        uri: record.uri.clone(),
        score,
        abstract_text: record.abstract_text.clone(),
        context_type: record.context_type.clone(),
        relations: Vec::new(),
        snippet,
        matched_heading,
        score_components: score_components_from_scored(components),
    }
}

pub(super) fn tokenize_keywords(query: &str) -> Vec<String> {
    let mut out = Vec::<String>::new();
    let mut seen = HashSet::<String>::new();
    for token in query
        .to_lowercase()
        .split(|c: char| !c.is_alphanumeric())
        .filter(|x| !x.is_empty())
    {
        let token = token.to_string();
        if seen.insert(token.clone()) {
            out.push(token);
        }
    }
    out
}

pub(super) fn merge_hits(acc: &mut HashMap<String, ContextHit>, hits: Vec<ContextHit>) {
    for hit in hits {
        if let Some(existing) = acc.get_mut(&hit.uri) {
            if hit.score > existing.score {
                *existing = hit;
            }
            continue;
        }
        acc.insert(hit.uri.clone(), hit);
    }
}

fn compare_hit_score_desc_then_uri_asc(a: &ContextHit, b: &ContextHit) -> Ordering {
    b.score
        .partial_cmp(&a.score)
        .unwrap_or(Ordering::Equal)
        .then_with(|| a.uri.cmp(&b.uri))
}

pub(super) fn sort_hits_by_score_desc_uri_asc(hits: &mut [ContextHit]) {
    hits.sort_by(compare_hit_score_desc_then_uri_asc);
}

pub(super) const fn fanout_priority_weight(priority: u8) -> f32 {
    match priority {
        0 | 1 => 1.0,
        2 => 0.82,
        3 => 0.64,
        _ => 0.46,
    }
}

pub(super) fn scale_hit_scores(hits: &mut [ContextHit], weight: f32) {
    if weight >= 1.0 {
        return;
    }
    for hit in hits {
        hit.score *= weight;
        hit.score_components.exact *= weight;
        hit.score_components.dense *= weight;
        hit.score_components.sparse *= weight;
        hit.score_components.path *= weight;
        hit.score_components.recency *= weight;
    }
}

pub(super) fn merge_trace_points(acc: &mut HashMap<String, f32>, points: &[TracePoint]) {
    for point in points {
        if let Some(score) = acc.get_mut(&point.uri) {
            if point.score > *score {
                *score = point.score;
            }
            continue;
        }
        acc.insert(point.uri.clone(), point.score);
    }
}

pub(super) fn scale_trace_point_scores(points: &mut [TracePoint], weight: f32) {
    if weight >= 1.0 {
        return;
    }
    for point in points {
        point.score *= weight;
    }
}

pub(super) fn sorted_trace_points(points: HashMap<String, f32>) -> Vec<TracePoint> {
    let mut out = points
        .into_iter()
        .map(|(uri, score)| TracePoint { uri, score })
        .collect::<Vec<_>>();
    out.sort_by(|a, b| {
        b.score
            .partial_cmp(&a.score)
            .unwrap_or(Ordering::Equal)
            .then_with(|| a.uri.cmp(&b.uri))
    });
    out
}

pub(super) fn typed_query_plans(
    planned_queries: &[PlannedQuery],
) -> Vec<crate::models::TypedQueryPlan> {
    let mut out = Vec::with_capacity(planned_queries.len());
    for x in planned_queries {
        out.push(crate::models::TypedQueryPlan {
            kind: x.kind.clone(),
            query: x.query.clone(),
            scopes: x.scopes.iter().map(|s| s.as_str().to_string()).collect(),
            priority: x.priority,
        });
    }
    out
}

fn score_components_from_scored(scored: Option<&ScoredRecord>) -> ScoreComponents {
    let Some(scored) = scored else {
        return ScoreComponents::default();
    };
    ScoreComponents {
        exact: scored.exact,
        dense: scored.dense,
        sparse: scored.sparse,
        path: scored.path,
        recency: scored.recency,
    }
}

fn build_snippet(record: &crate::models::IndexRecord, query_tokens: &[String]) -> Option<String> {
    let content_line = record
        .content
        .lines()
        .map(str::trim)
        .find(|line| {
            !line.is_empty()
                && if query_tokens.is_empty() {
                    true
                } else {
                    line_contains_any_token(line, query_tokens)
                }
        })
        .map(clip_preview);
    if content_line.is_some() {
        return content_line;
    }

    let abstract_line = clip_preview(record.abstract_text.trim());
    if !abstract_line.is_empty() {
        return Some(abstract_line);
    }
    None
}

fn find_matched_heading(
    record: &crate::models::IndexRecord,
    query_tokens: &[String],
) -> Option<String> {
    let mut first_heading = None::<String>;
    for line in record.content.lines().map(str::trim) {
        if !line.starts_with('#') {
            continue;
        }
        let heading = line.trim_start_matches('#').trim();
        if heading.is_empty() {
            continue;
        }
        if first_heading.is_none() {
            first_heading = Some(clip_preview(heading));
        }
        if !query_tokens.is_empty() && line_contains_any_token(heading, query_tokens) {
            return Some(clip_preview(heading));
        }
    }
    first_heading
}

fn line_contains_any_token(line: &str, query_tokens: &[String]) -> bool {
    let lowered = line.to_ascii_lowercase();
    query_tokens.iter().any(|token| lowered.contains(token))
}

fn clip_preview(raw: &str) -> String {
    raw.chars().take(MAX_SNIPPET_CHARS).collect::<String>()
}

#[cfg(test)]
mod tests {
    use super::{
        fanout_priority_weight, make_hit, merge_hits, scale_hit_scores, scale_trace_point_scores,
        sort_hits_by_score_desc_uri_asc, tokenize_keywords,
    };
    use crate::models::{ContextHit, TracePoint};
    use std::collections::HashMap;

    fn hit(uri: &str, score: f32) -> ContextHit {
        ContextHit {
            uri: uri.to_string(),
            score,
            abstract_text: String::new(),
            context_type: "resource".to_string(),
            relations: Vec::new(),
            snippet: None,
            matched_heading: None,
            score_components: crate::models::ScoreComponents::default(),
        }
    }

    #[test]
    fn fanout_priority_weight_profile_is_explicit_and_deterministic() {
        assert_eq!(fanout_priority_weight(1), 1.0);
        assert_eq!(fanout_priority_weight(2), 0.82);
        assert_eq!(fanout_priority_weight(3), 0.64);
        assert_eq!(fanout_priority_weight(4), 0.46);
        assert_eq!(fanout_priority_weight(9), 0.46);
    }

    #[test]
    fn weighted_merge_keeps_primary_when_secondary_query_is_noisy() {
        let mut merged = HashMap::new();
        merge_hits(&mut merged, vec![hit("axiom://resources/exact.md", 0.72)]);

        let mut noisy_hits = vec![
            hit("axiom://resources/exact.md", 0.79),
            hit("axiom://resources/noise.md", 0.84),
        ];
        scale_hit_scores(&mut noisy_hits, fanout_priority_weight(2));
        merge_hits(&mut merged, noisy_hits);

        let exact = merged
            .get("axiom://resources/exact.md")
            .expect("exact hit")
            .score;
        let noise = merged
            .get("axiom://resources/noise.md")
            .expect("noise hit")
            .score;

        assert!((exact - 0.72).abs() < 0.0001);
        assert!(noise < exact);
    }

    #[test]
    fn make_hit_contains_snippet_and_score_components() {
        let record = crate::models::IndexRecord {
            id: "id-1".to_string(),
            uri: "axiom://resources/docs/api.md".to_string(),
            parent_uri: Some("axiom://resources/docs".to_string()),
            is_leaf: true,
            context_type: "resource".to_string(),
            name: "api.md".to_string(),
            abstract_text: "api guide".to_string(),
            content: "# OAuth Flow\nUse token exchange endpoint.".to_string(),
            tags: vec!["markdown".to_string()],
            updated_at: chrono::Utc::now(),
            depth: 3,
        };
        let scored = crate::index::ScoredRecord {
            uri: std::sync::Arc::from(record.uri.as_str()),
            is_leaf: true,
            depth: 3,
            exact: 0.91,
            dense: 0.52,
            sparse: 0.73,
            recency: 0.40,
            path: 0.17,
            score: 0.88,
        };
        let hit = make_hit(&record, 0.88, "oauth token", Some(&scored));
        assert_eq!(hit.matched_heading.as_deref(), Some("OAuth Flow"));
        assert!(hit.snippet.is_some());
        assert!(hit.score_components.exact > 0.0);
        assert!(hit.score_components.sparse > 0.0);
    }

    #[test]
    fn scale_trace_point_scores_applies_same_weight_rule() {
        let mut points = vec![TracePoint {
            uri: "axiom://resources/root".to_string(),
            score: 0.50,
        }];
        scale_trace_point_scores(&mut points, fanout_priority_weight(3));
        assert!((points[0].score - 0.32).abs() < 0.0001);
    }

    #[test]
    fn hit_sort_is_deterministic_for_equal_scores_via_uri_tiebreak() {
        let mut hits = vec![
            hit("axiom://resources/z.md", 0.70),
            hit("axiom://resources/a.md", 0.70),
            hit("axiom://resources/m.md", 0.90),
        ];
        sort_hits_by_score_desc_uri_asc(&mut hits);
        let uris = hits.iter().map(|x| x.uri.as_str()).collect::<Vec<_>>();
        assert_eq!(
            uris,
            vec![
                "axiom://resources/m.md",
                "axiom://resources/a.md",
                "axiom://resources/z.md"
            ]
        );
    }

    #[test]
    fn tokenize_keywords_normalizes_and_deduplicates_in_order() {
        let tokens = tokenize_keywords("OAuth oauth, token TOKEN refresh");
        assert_eq!(tokens, vec!["oauth", "token", "refresh"]);
    }
}