kg/
output.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2
3use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
4use nucleo_matcher::{Config, Matcher, Utf32Str};
5
6use crate::graph::{Edge, GraphFile, Node, Note};
7use crate::index::Bm25Index;
8
9const BM25_K1: f64 = 1.5;
10const BM25_B: f64 = 0.75;
11const DEFAULT_TARGET_CHARS: usize = 1400;
12const MIN_TARGET_CHARS: usize = 300;
13const MAX_TARGET_CHARS: usize = 12_000;
14const FUZZY_NEIGHBOR_CONTEXT_CAP: u32 = 220;
15const FUZZY_NO_PRIMARY_CONTEXT_DIVISOR: u32 = 3;
16const FUZZY_DESCRIPTION_WEIGHT: u32 = 2;
17const FUZZY_FACT_WEIGHT: u32 = 2;
18const FUZZY_NOTE_BODY_WEIGHT: u32 = 1;
19const FUZZY_NOTE_TAG_WEIGHT: u32 = 2;
20const BM25_PHRASE_MATCH_BOOST: i64 = 120;
21const BM25_TOKEN_MATCH_BOOST: i64 = 45;
22const BM25_ID_WEIGHT: usize = 4;
23const BM25_NAME_WEIGHT: usize = 3;
24const BM25_ALIAS_WEIGHT: usize = 2;
25const BM25_DESCRIPTION_WEIGHT: usize = 2;
26const BM25_FACT_WEIGHT: usize = 2;
27const BM25_NOTE_BODY_WEIGHT: usize = 1;
28const BM25_NOTE_TAG_WEIGHT: usize = 1;
29const BM25_NEIGHBOR_WEIGHT: usize = 1;
30const IMPORTANCE_NEUTRAL: i64 = 4;
31const IMPORTANCE_STEP_BOOST: i64 = 22;
32const SCORE_META_MAX_RATIO: f64 = 0.35;
33const SCORE_META_MIN_CAP: i64 = 30;
34const SCORE_META_MAX_CAP: i64 = 240;
35
36#[derive(Debug, Clone, Copy)]
37pub enum FindMode {
38    Fuzzy,
39    Bm25,
40}
41
42#[derive(Clone, Copy)]
43struct ScoredNode<'a> {
44    score: i64,
45    node: &'a Node,
46    breakdown: ScoreBreakdown,
47}
48
49#[derive(Debug, Clone, Copy)]
50struct ScoreBreakdown {
51    raw_relevance: f64,
52    normalized_relevance: i64,
53    lexical_boost: i64,
54    feedback_boost: i64,
55    importance_boost: i64,
56    authority_raw: i64,
57    authority_applied: i64,
58    authority_cap: i64,
59}
60
61struct RawCandidate<'a> {
62    node: &'a Node,
63    raw_relevance: f64,
64    lexical_boost: i64,
65}
66
67struct FindQueryContext<'a> {
68    notes_by_node: HashMap<&'a str, Vec<&'a Note>>,
69    neighbors_by_node: HashMap<&'a str, Vec<&'a Node>>,
70}
71
72impl<'a> FindQueryContext<'a> {
73    fn build(graph: &'a GraphFile) -> Self {
74        let node_by_id: HashMap<&'a str, &'a Node> = graph
75            .nodes
76            .iter()
77            .map(|node| (node.id.as_str(), node))
78            .collect();
79
80        let mut notes_by_node: HashMap<&'a str, Vec<&'a Note>> = HashMap::new();
81        for note in &graph.notes {
82            notes_by_node
83                .entry(note.node_id.as_str())
84                .or_default()
85                .push(note);
86        }
87
88        let mut neighbors_by_node: HashMap<&'a str, Vec<&'a Node>> = HashMap::new();
89        for edge in &graph.edges {
90            if let (Some(source), Some(target)) = (
91                node_by_id.get(edge.source_id.as_str()),
92                node_by_id.get(edge.target_id.as_str()),
93            ) {
94                neighbors_by_node
95                    .entry(source.id.as_str())
96                    .or_default()
97                    .push(*target);
98                neighbors_by_node
99                    .entry(target.id.as_str())
100                    .or_default()
101                    .push(*source);
102            }
103        }
104
105        for neighbors in neighbors_by_node.values_mut() {
106            neighbors.sort_by(|left, right| left.id.cmp(&right.id));
107            neighbors.dedup_by(|left, right| left.id == right.id);
108        }
109
110        Self {
111            notes_by_node,
112            neighbors_by_node,
113        }
114    }
115
116    fn notes_for(&self, node_id: &str) -> &[&'a Note] {
117        self.notes_by_node
118            .get(node_id)
119            .map(Vec::as_slice)
120            .unwrap_or(&[])
121    }
122
123    fn neighbors_for(&self, node_id: &str) -> &[&'a Node] {
124        self.neighbors_by_node
125            .get(node_id)
126            .map(Vec::as_slice)
127            .unwrap_or(&[])
128    }
129}
130
131#[derive(Debug, Clone)]
132pub struct ScoreBreakdownResult {
133    pub raw_relevance: f64,
134    pub normalized_relevance: i64,
135    pub lexical_boost: i64,
136    pub feedback_boost: i64,
137    pub importance_boost: i64,
138    pub authority_raw: i64,
139    pub authority_applied: i64,
140    pub authority_cap: i64,
141}
142
143#[derive(Debug, Clone)]
144pub struct ScoredNodeResult {
145    pub score: i64,
146    pub node: Node,
147    pub breakdown: ScoreBreakdownResult,
148}
149
150pub fn render_find(
151    graph: &GraphFile,
152    queries: &[String],
153    limit: usize,
154    include_features: bool,
155    mode: FindMode,
156    full: bool,
157) -> String {
158    render_find_with_index(
159        graph,
160        queries,
161        limit,
162        include_features,
163        mode,
164        full,
165        false,
166        None,
167    )
168}
169
170pub fn render_find_with_index(
171    graph: &GraphFile,
172    queries: &[String],
173    limit: usize,
174    include_features: bool,
175    mode: FindMode,
176    full: bool,
177    debug_score: bool,
178    index: Option<&Bm25Index>,
179) -> String {
180    let mut sections = Vec::new();
181    for query in queries {
182        let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
183        let total = matches.len();
184        let visible: Vec<_> = matches.into_iter().take(limit).collect();
185        let shown = visible.len();
186        let mut lines = vec![render_result_header(query, shown, total)];
187        for scored in visible {
188            lines.push(render_scored_node_block(graph, &scored, full, debug_score));
189        }
190        push_limit_omission_line(&mut lines, shown, total);
191        sections.push(lines.join("\n"));
192    }
193    format!("{}\n", sections.join("\n\n"))
194}
195
196pub fn find_nodes(
197    graph: &GraphFile,
198    query: &str,
199    limit: usize,
200    include_features: bool,
201    mode: FindMode,
202) -> Vec<Node> {
203    find_matches_with_index(graph, query, limit, include_features, mode, None)
204        .into_iter()
205        .map(|item| item.node.clone())
206        .collect()
207}
208
209pub fn find_nodes_with_index(
210    graph: &GraphFile,
211    query: &str,
212    limit: usize,
213    include_features: bool,
214    mode: FindMode,
215    index: Option<&Bm25Index>,
216) -> Vec<Node> {
217    find_matches_with_index(graph, query, limit, include_features, mode, index)
218        .into_iter()
219        .map(|item| item.node.clone())
220        .collect()
221}
222
223pub fn find_nodes_and_total_with_index(
224    graph: &GraphFile,
225    query: &str,
226    limit: usize,
227    include_features: bool,
228    mode: FindMode,
229    index: Option<&Bm25Index>,
230) -> (usize, Vec<Node>) {
231    let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
232    let total = matches.len();
233    let nodes = matches
234        .into_iter()
235        .take(limit)
236        .map(|item| item.node.clone())
237        .collect();
238    (total, nodes)
239}
240
241pub fn find_scored_nodes_and_total_with_index(
242    graph: &GraphFile,
243    query: &str,
244    limit: usize,
245    include_features: bool,
246    mode: FindMode,
247    index: Option<&Bm25Index>,
248) -> (usize, Vec<ScoredNodeResult>) {
249    let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
250    let total = matches.len();
251    let nodes = matches
252        .into_iter()
253        .take(limit)
254        .map(|item| ScoredNodeResult {
255            score: item.score,
256            node: item.node.clone(),
257            breakdown: ScoreBreakdownResult {
258                raw_relevance: item.breakdown.raw_relevance,
259                normalized_relevance: item.breakdown.normalized_relevance,
260                lexical_boost: item.breakdown.lexical_boost,
261                feedback_boost: item.breakdown.feedback_boost,
262                importance_boost: item.breakdown.importance_boost,
263                authority_raw: item.breakdown.authority_raw,
264                authority_applied: item.breakdown.authority_applied,
265                authority_cap: item.breakdown.authority_cap,
266            },
267        })
268        .collect();
269    (total, nodes)
270}
271
272pub fn count_find_results(
273    graph: &GraphFile,
274    queries: &[String],
275    limit: usize,
276    include_features: bool,
277    mode: FindMode,
278) -> usize {
279    count_find_results_with_index(graph, queries, limit, include_features, mode, None)
280}
281
282pub fn count_find_results_with_index(
283    graph: &GraphFile,
284    queries: &[String],
285    _limit: usize,
286    include_features: bool,
287    mode: FindMode,
288    index: Option<&Bm25Index>,
289) -> usize {
290    let mut total = 0;
291    for query in queries {
292        total += find_all_matches_with_index(graph, query, include_features, mode, index).len();
293    }
294    total
295}
296
297pub fn render_node(graph: &GraphFile, node: &Node, full: bool) -> String {
298    format!("{}\n", render_node_block(graph, node, full))
299}
300
301pub fn render_node_adaptive(graph: &GraphFile, node: &Node, target_chars: Option<usize>) -> String {
302    let target = clamp_target_chars(target_chars);
303    let full = format!("{}\n", render_node_block(graph, node, true));
304    if fits_target_chars(&full, target) {
305        return full;
306    }
307    let mut candidates = Vec::new();
308    for (depth, detail, edge_cap) in [
309        (0usize, DetailLevel::Rich, 8usize),
310        (1usize, DetailLevel::Rich, 8usize),
311        (2usize, DetailLevel::Rich, 6usize),
312        (2usize, DetailLevel::Compact, 6usize),
313        (2usize, DetailLevel::Minimal, 2usize),
314    ] {
315        let rendered = render_single_node_candidate(graph, node, depth, detail, edge_cap);
316        candidates.push(Candidate {
317            rendered,
318            depth,
319            detail,
320            shown_nodes: 1 + depth,
321        });
322    }
323    pick_best_candidate(candidates, target)
324}
325
326pub fn render_find_adaptive_with_index(
327    graph: &GraphFile,
328    queries: &[String],
329    limit: usize,
330    include_features: bool,
331    mode: FindMode,
332    target_chars: Option<usize>,
333    debug_score: bool,
334    index: Option<&Bm25Index>,
335) -> String {
336    let target = clamp_target_chars(target_chars);
337    let mut sections = Vec::new();
338    for query in queries {
339        let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
340        let total = matches.len();
341        let visible: Vec<_> = matches.into_iter().take(limit).collect();
342        let section = if visible.len() == 1 {
343            render_single_result_section(graph, query, &visible[0], total, target, debug_score)
344        } else {
345            render_multi_result_section(graph, query, &visible, total, target, debug_score)
346        };
347        sections.push(section);
348    }
349    format!("{}\n", sections.join("\n\n"))
350}
351
352#[derive(Clone, Copy)]
353enum DetailLevel {
354    Rich,
355    Compact,
356    Minimal,
357}
358
359struct Candidate {
360    rendered: String,
361    depth: usize,
362    detail: DetailLevel,
363    shown_nodes: usize,
364}
365
366impl DetailLevel {
367    fn utility_bonus(self) -> usize {
368        match self {
369            DetailLevel::Rich => 20,
370            DetailLevel::Compact => 10,
371            DetailLevel::Minimal => 0,
372        }
373    }
374}
375
376fn clamp_target_chars(target_chars: Option<usize>) -> usize {
377    target_chars
378        .unwrap_or(DEFAULT_TARGET_CHARS)
379        .clamp(MIN_TARGET_CHARS, MAX_TARGET_CHARS)
380}
381
382fn render_single_result_section(
383    graph: &GraphFile,
384    query: &str,
385    node: &ScoredNode<'_>,
386    total_available: usize,
387    target: usize,
388    debug_score: bool,
389) -> String {
390    let header = render_result_header(query, 1, total_available);
391    let full = render_single_result_candidate(
392        graph,
393        &header,
394        node,
395        total_available,
396        0,
397        DetailLevel::Rich,
398        8,
399        true,
400        debug_score,
401    );
402    if fits_target_chars(&full, target) {
403        return full.trim_end().to_owned();
404    }
405    let mut candidates = Vec::new();
406    for (depth, detail, edge_cap) in [
407        (0usize, DetailLevel::Rich, 8usize),
408        (1usize, DetailLevel::Rich, 8usize),
409        (2usize, DetailLevel::Rich, 6usize),
410        (2usize, DetailLevel::Compact, 6usize),
411        (2usize, DetailLevel::Minimal, 2usize),
412    ] {
413        candidates.push(Candidate {
414            rendered: render_single_result_candidate(
415                graph,
416                &header,
417                node,
418                total_available,
419                depth,
420                detail,
421                edge_cap,
422                false,
423                debug_score,
424            ),
425            depth,
426            detail,
427            shown_nodes: 1 + depth,
428        });
429    }
430    pick_best_candidate(candidates, target)
431        .trim_end()
432        .to_owned()
433}
434
435fn render_multi_result_section(
436    graph: &GraphFile,
437    query: &str,
438    nodes: &[ScoredNode<'_>],
439    total_available: usize,
440    target: usize,
441    debug_score: bool,
442) -> String {
443    let visible_total = nodes.len();
444    let full = render_full_result_section(graph, query, nodes, total_available, debug_score);
445    if fits_target_chars(&full, target) {
446        return full;
447    }
448    let mut candidates = Vec::new();
449    let full_cap = visible_total;
450    let mid_cap = full_cap.min(5);
451    let low_cap = full_cap.min(3);
452
453    for (detail, edge_cap, result_cap, depth) in [
454        (DetailLevel::Rich, 4usize, full_cap.min(4), 0usize),
455        (DetailLevel::Compact, 3usize, full_cap, 0usize),
456        (DetailLevel::Rich, 2usize, mid_cap, 1usize),
457        (DetailLevel::Compact, 1usize, full_cap, 0usize),
458        (DetailLevel::Minimal, 1usize, mid_cap, 0usize),
459        (DetailLevel::Minimal, 0usize, low_cap, 0usize),
460        (DetailLevel::Minimal, 0usize, low_cap.min(2), 1usize),
461    ] {
462        let shown = result_cap.min(nodes.len());
463        let mut lines = vec![render_result_header(query, shown, total_available)];
464        for node in nodes.iter().take(shown) {
465            lines.extend(render_scored_node_candidate_lines(
466                graph,
467                node,
468                0,
469                detail,
470                edge_cap,
471                debug_score,
472            ));
473            if depth > 0 {
474                lines.extend(render_neighbor_layers(graph, node.node, depth, detail));
475            }
476        }
477        if visible_total > shown {
478            lines.push(format!("... +{} more nodes omitted", visible_total - shown));
479        }
480        push_limit_omission_line(&mut lines, visible_total, total_available);
481        candidates.push(Candidate {
482            rendered: format!("{}\n", lines.join("\n")),
483            depth,
484            detail,
485            shown_nodes: shown,
486        });
487    }
488
489    pick_best_candidate(candidates, target)
490        .trim_end()
491        .to_owned()
492}
493
494fn pick_best_candidate(candidates: Vec<Candidate>, target: usize) -> String {
495    let lower = (target as f64 * 0.7) as usize;
496    let mut best: Option<(usize, usize, usize, usize, String)> = None;
497
498    for candidate in candidates {
499        let chars = candidate.rendered.chars().count();
500        let overshoot = chars.saturating_sub(target);
501        let undershoot = lower.saturating_sub(chars);
502        let penalty = overshoot.saturating_mul(10).saturating_add(undershoot);
503        let utility = candidate
504            .depth
505            .saturating_mul(100)
506            .saturating_add(candidate.shown_nodes.saturating_mul(5))
507            .saturating_add(candidate.detail.utility_bonus());
508
509        let entry = (
510            penalty,
511            overshoot,
512            usize::MAX - utility,
513            usize::MAX - chars,
514            candidate.rendered,
515        );
516        if best.as_ref().is_none_or(|current| {
517            entry.0 < current.0
518                || (entry.0 == current.0 && entry.1 < current.1)
519                || (entry.0 == current.0 && entry.1 == current.1 && entry.2 < current.2)
520                || (entry.0 == current.0
521                    && entry.1 == current.1
522                    && entry.2 == current.2
523                    && entry.3 < current.3)
524        }) {
525            best = Some(entry);
526        }
527    }
528
529    best.map(|item| item.4).unwrap_or_else(|| "\n".to_owned())
530}
531
532fn render_full_result_section(
533    graph: &GraphFile,
534    query: &str,
535    nodes: &[ScoredNode<'_>],
536    total_available: usize,
537    debug_score: bool,
538) -> String {
539    let mut lines = vec![render_result_header(query, nodes.len(), total_available)];
540    for node in nodes {
541        lines.push(render_scored_node_block(graph, node, true, debug_score));
542    }
543    push_limit_omission_line(&mut lines, nodes.len(), total_available);
544    lines.join("\n")
545}
546
547fn render_result_header(query: &str, shown: usize, total: usize) -> String {
548    let query = escape_cli_text(query);
549    if shown < total {
550        format!("? {query} ({shown}/{total})")
551    } else {
552        format!("? {query} ({total})")
553    }
554}
555
556fn push_limit_omission_line(lines: &mut Vec<String>, shown: usize, total: usize) {
557    let omitted = total.saturating_sub(shown);
558    if omitted > 0 {
559        lines.push(format!("... {omitted} more nodes omitted by limit"));
560    }
561}
562
563fn fits_target_chars(rendered: &str, target: usize) -> bool {
564    rendered.chars().count() <= target
565}
566
567fn render_single_node_candidate(
568    graph: &GraphFile,
569    node: &Node,
570    depth: usize,
571    detail: DetailLevel,
572    edge_cap: usize,
573) -> String {
574    let lines = render_single_node_candidate_lines(graph, node, depth, detail, edge_cap);
575    format!("{}\n", lines.join("\n"))
576}
577
578fn render_single_result_candidate(
579    graph: &GraphFile,
580    header: &str,
581    node: &ScoredNode<'_>,
582    total_available: usize,
583    depth: usize,
584    detail: DetailLevel,
585    edge_cap: usize,
586    full: bool,
587    debug_score: bool,
588) -> String {
589    let mut lines = vec![header.to_owned()];
590    if full {
591        lines.push(render_scored_node_block(graph, node, true, debug_score));
592    } else {
593        lines.extend(render_scored_node_candidate_lines(
594            graph,
595            node,
596            depth,
597            detail,
598            edge_cap,
599            debug_score,
600        ));
601    }
602    push_limit_omission_line(&mut lines, 1, total_available);
603    format!("{}\n", lines.join("\n"))
604}
605
606fn render_single_node_candidate_lines(
607    graph: &GraphFile,
608    node: &Node,
609    depth: usize,
610    detail: DetailLevel,
611    edge_cap: usize,
612) -> Vec<String> {
613    let mut lines = render_node_lines_with_edges(graph, node, detail, edge_cap);
614    if depth > 0 {
615        lines.extend(render_neighbor_layers(graph, node, depth, detail));
616    }
617    lines
618}
619
620fn render_scored_node_candidate_lines(
621    graph: &GraphFile,
622    node: &ScoredNode<'_>,
623    depth: usize,
624    detail: DetailLevel,
625    edge_cap: usize,
626    debug_score: bool,
627) -> Vec<String> {
628    let mut lines = vec![format!("score: {}", node.score)];
629    if debug_score {
630        lines.push(render_score_debug_line(node));
631    }
632    lines.extend(render_single_node_candidate_lines(
633        graph, node.node, depth, detail, edge_cap,
634    ));
635    lines
636}
637
638fn render_scored_node_block(
639    graph: &GraphFile,
640    node: &ScoredNode<'_>,
641    full: bool,
642    debug_score: bool,
643) -> String {
644    if debug_score {
645        format!(
646            "score: {}\n{}\n{}",
647            node.score,
648            render_score_debug_line(node),
649            render_node_block(graph, node.node, full)
650        )
651    } else {
652        format!(
653            "score: {}\n{}",
654            node.score,
655            render_node_block(graph, node.node, full)
656        )
657    }
658}
659
660fn render_score_debug_line(node: &ScoredNode<'_>) -> String {
661    format!(
662        "score_debug: raw_relevance={:.3} normalized_relevance={} lexical_boost={} feedback_boost={} importance_boost={} authority_raw={} authority_applied={} authority_cap={}",
663        node.breakdown.raw_relevance,
664        node.breakdown.normalized_relevance,
665        node.breakdown.lexical_boost,
666        node.breakdown.feedback_boost,
667        node.breakdown.importance_boost,
668        node.breakdown.authority_raw,
669        node.breakdown.authority_applied,
670        node.breakdown.authority_cap,
671    )
672}
673
674fn render_neighbor_layers(
675    graph: &GraphFile,
676    root: &Node,
677    max_depth: usize,
678    detail: DetailLevel,
679) -> Vec<String> {
680    let mut out = Vec::new();
681    let mut seen: HashSet<String> = HashSet::from([root.id.clone()]);
682    let mut queue: VecDeque<(String, usize)> = VecDeque::from([(root.id.clone(), 0usize)]);
683    let mut layers: Vec<Vec<&Node>> = vec![Vec::new(); max_depth + 1];
684
685    while let Some((node_id, depth)) = queue.pop_front() {
686        if depth >= max_depth {
687            continue;
688        }
689        for incident in incident_edges(graph, &node_id) {
690            if seen.insert(incident.related.id.clone()) {
691                let next_depth = depth + 1;
692                if next_depth <= max_depth {
693                    layers[next_depth].push(incident.related);
694                    queue.push_back((incident.related.id.clone(), next_depth));
695                }
696            }
697        }
698    }
699
700    for depth in 1..=max_depth {
701        if layers[depth].is_empty() {
702            continue;
703        }
704        let cap = match detail {
705            DetailLevel::Rich => 6,
706            DetailLevel::Compact => 4,
707            DetailLevel::Minimal => 3,
708        };
709        let shown = layers[depth].len().min(cap);
710        out.push(format!(
711            "depth {depth}: {shown}/{} neighbors",
712            layers[depth].len()
713        ));
714        for node in layers[depth].iter().take(shown) {
715            out.extend(render_node_identity_lines(node, detail));
716        }
717        if layers[depth].len() > shown {
718            out.push(format!(
719                "... +{} more neighbors omitted",
720                layers[depth].len() - shown
721            ));
722        }
723    }
724
725    out
726}
727
728fn render_node_lines_with_edges(
729    graph: &GraphFile,
730    node: &Node,
731    detail: DetailLevel,
732    edge_cap: usize,
733) -> Vec<String> {
734    let mut lines = render_node_identity_lines(node, detail);
735    lines.extend(render_node_link_lines(graph, node, edge_cap));
736    lines
737}
738
739fn render_node_identity_lines(node: &Node, detail: DetailLevel) -> Vec<String> {
740    let mut lines = Vec::new();
741    match detail {
742        DetailLevel::Rich => {
743            lines.push(format!(
744                "# {} | {} [{}]",
745                node.id,
746                escape_cli_text(&node.name),
747                node.r#type
748            ));
749            if !node.properties.alias.is_empty() {
750                lines.push(format!(
751                    "aka: {}",
752                    node.properties
753                        .alias
754                        .iter()
755                        .map(|alias| escape_cli_text(alias))
756                        .collect::<Vec<_>>()
757                        .join(", ")
758                ));
759            }
760            push_description_line(&mut lines, &node.properties.description, None);
761            let shown_facts = node.properties.key_facts.len().min(3);
762            for fact in node.properties.key_facts.iter().take(shown_facts) {
763                lines.push(format!("- {}", escape_cli_text(fact)));
764            }
765            let omitted = node.properties.key_facts.len().saturating_sub(shown_facts);
766            if omitted > 0 {
767                lines.push(format!("... {omitted} more facts omitted"));
768            }
769        }
770        DetailLevel::Compact => {
771            lines.push(format!(
772                "# {} | {} [{}]",
773                node.id,
774                escape_cli_text(&node.name),
775                node.r#type
776            ));
777            push_description_line(&mut lines, &node.properties.description, Some(140));
778            if let Some(fact) = node.properties.key_facts.first() {
779                lines.push(format!("- {}", escape_cli_text(fact)));
780            }
781        }
782        DetailLevel::Minimal => {
783            lines.push(format!(
784                "# {} | {} [{}]",
785                node.id,
786                escape_cli_text(&node.name),
787                node.r#type
788            ));
789        }
790    }
791    lines
792}
793
794fn render_node_link_lines(graph: &GraphFile, node: &Node, edge_cap: usize) -> Vec<String> {
795    let incident = incident_edges(graph, &node.id);
796    if incident.is_empty() {
797        return Vec::new();
798    }
799
800    let mut lines = Vec::new();
801    if incident.len() > 12 {
802        lines.push(format!("links: {} total", incident.len()));
803        let (out_summary, in_summary) = summarize_relations(&incident);
804        if !out_summary.is_empty() {
805            lines.push(format!("out: {out_summary}"));
806        }
807        if !in_summary.is_empty() {
808            lines.push(format!("in: {in_summary}"));
809        }
810    }
811
812    let shown = incident.len().min(edge_cap);
813    for edge in incident.into_iter().take(shown) {
814        let prefix = if edge.incoming { "<-" } else { "->" };
815        lines.extend(render_edge_lines(prefix, edge.edge, edge.related, false));
816    }
817    if edge_cap > 0 && incident_count(graph, &node.id) > shown {
818        lines.push(format!(
819            "... {} more links omitted",
820            incident_count(graph, &node.id) - shown
821        ));
822    }
823    lines
824}
825
826fn incident_count(graph: &GraphFile, node_id: &str) -> usize {
827    graph
828        .edges
829        .iter()
830        .filter(|edge| edge.source_id == node_id || edge.target_id == node_id)
831        .count()
832}
833
834struct IncidentEdge<'a> {
835    edge: &'a Edge,
836    related: &'a Node,
837    incoming: bool,
838}
839
840fn incident_edges<'a>(graph: &'a GraphFile, node_id: &str) -> Vec<IncidentEdge<'a>> {
841    let mut edges = Vec::new();
842    for edge in &graph.edges {
843        if edge.source_id == node_id {
844            if let Some(related) = graph.node_by_id(&edge.target_id) {
845                edges.push(IncidentEdge {
846                    edge,
847                    related,
848                    incoming: false,
849                });
850            }
851        } else if edge.target_id == node_id {
852            if let Some(related) = graph.node_by_id(&edge.source_id) {
853                edges.push(IncidentEdge {
854                    edge,
855                    related,
856                    incoming: true,
857                });
858            }
859        }
860    }
861    edges.sort_by(|left, right| {
862        right
863            .related
864            .properties
865            .importance
866            .cmp(&left.related.properties.importance)
867            .then_with(|| left.edge.relation.cmp(&right.edge.relation))
868            .then_with(|| left.related.id.cmp(&right.related.id))
869    });
870    edges
871}
872
873fn summarize_relations(edges: &[IncidentEdge<'_>]) -> (String, String) {
874    let mut out: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
875    let mut incoming: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
876
877    for edge in edges {
878        let bucket = if edge.incoming {
879            &mut incoming
880        } else {
881            &mut out
882        };
883        *bucket.entry(edge.edge.relation.clone()).or_insert(0) += 1;
884    }
885
886    (join_relation_counts(&out), join_relation_counts(&incoming))
887}
888
889fn join_relation_counts(counts: &std::collections::BTreeMap<String, usize>) -> String {
890    counts
891        .iter()
892        .take(3)
893        .map(|(relation, count)| format!("{relation} x{count}"))
894        .collect::<Vec<_>>()
895        .join(", ")
896}
897
898fn render_node_block(graph: &GraphFile, node: &Node, full: bool) -> String {
899    let mut lines = Vec::new();
900    lines.push(format!(
901        "# {} | {} [{}]",
902        node.id,
903        escape_cli_text(&node.name),
904        node.r#type
905    ));
906
907    if !node.properties.alias.is_empty() {
908        lines.push(format!(
909            "aka: {}",
910            node.properties
911                .alias
912                .iter()
913                .map(|alias| escape_cli_text(alias))
914                .collect::<Vec<_>>()
915                .join(", ")
916        ));
917    }
918    push_description_line(
919        &mut lines,
920        &node.properties.description,
921        if full { None } else { Some(200) },
922    );
923    if full {
924        if !node.properties.domain_area.is_empty() {
925            lines.push(format!(
926                "domain_area: {}",
927                escape_cli_text(&node.properties.domain_area)
928            ));
929        }
930        if !node.properties.provenance.is_empty() {
931            lines.push(format!(
932                "provenance: {}",
933                escape_cli_text(&node.properties.provenance)
934            ));
935        }
936        if let Some(confidence) = node.properties.confidence {
937            lines.push(format!("confidence: {confidence}"));
938        }
939        lines.push(format!("importance: {}", node.properties.importance));
940        if !node.properties.created_at.is_empty() {
941            lines.push(format!("created_at: {}", node.properties.created_at));
942        }
943    }
944
945    let facts_to_show = if full {
946        node.properties.key_facts.len()
947    } else {
948        node.properties.key_facts.len().min(2)
949    };
950    for fact in node.properties.key_facts.iter().take(facts_to_show) {
951        lines.push(format!("- {}", escape_cli_text(fact)));
952    }
953    let omitted = node
954        .properties
955        .key_facts
956        .len()
957        .saturating_sub(facts_to_show);
958    if omitted > 0 {
959        lines.push(format!("... {omitted} more facts omitted"));
960    }
961
962    if full {
963        if !node.source_files.is_empty() {
964            lines.push(format!(
965                "sources: {}",
966                node.source_files
967                    .iter()
968                    .map(|source| escape_cli_text(source))
969                    .collect::<Vec<_>>()
970                    .join(", ")
971            ));
972        }
973        push_feedback_lines(
974            &mut lines,
975            node.properties.feedback_score,
976            node.properties.feedback_count,
977            node.properties.feedback_last_ts_ms,
978            None,
979        );
980    }
981
982    let attached_notes: Vec<_> = graph
983        .notes
984        .iter()
985        .filter(|note| note.node_id == node.id)
986        .collect();
987    if full && !attached_notes.is_empty() {
988        lines.push(format!("notes: {}", attached_notes.len()));
989        for note in attached_notes {
990            lines.extend(render_attached_note_lines(note));
991        }
992    }
993
994    for edge in outgoing_edges(graph, &node.id, full) {
995        if let Some(target) = graph.node_by_id(&edge.target_id) {
996            lines.extend(render_edge_lines("->", edge, target, full));
997        }
998    }
999    for edge in incoming_edges(graph, &node.id, full) {
1000        if let Some(source) = graph.node_by_id(&edge.source_id) {
1001            lines.extend(render_edge_lines("<-", edge, source, full));
1002        }
1003    }
1004
1005    lines.join("\n")
1006}
1007
1008fn outgoing_edges<'a>(graph: &'a GraphFile, node_id: &str, full: bool) -> Vec<&'a Edge> {
1009    let mut edges: Vec<&Edge> = graph
1010        .edges
1011        .iter()
1012        .filter(|edge| edge.source_id == node_id)
1013        .collect();
1014    edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1015    if !full {
1016        edges.truncate(3);
1017    }
1018    edges
1019}
1020
1021fn incoming_edges<'a>(graph: &'a GraphFile, node_id: &str, full: bool) -> Vec<&'a Edge> {
1022    let mut edges: Vec<&Edge> = graph
1023        .edges
1024        .iter()
1025        .filter(|edge| edge.target_id == node_id)
1026        .collect();
1027    edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1028    if !full {
1029        edges.truncate(3);
1030    }
1031    edges
1032}
1033
1034fn render_edge_lines(prefix: &str, edge: &Edge, related: &Node, full: bool) -> Vec<String> {
1035    let (arrow, relation) = if edge.relation.starts_with("NOT_") {
1036        (
1037            format!("{prefix}!"),
1038            edge.relation.trim_start_matches("NOT_"),
1039        )
1040    } else {
1041        (prefix.to_owned(), edge.relation.as_str())
1042    };
1043
1044    let mut line = format!(
1045        "{arrow} {relation} | {} | {}",
1046        related.id,
1047        escape_cli_text(&related.name)
1048    );
1049    if !edge.properties.detail.is_empty() {
1050        line.push_str(" | ");
1051        let detail = escape_cli_text(&edge.properties.detail);
1052        if full {
1053            line.push_str(&detail);
1054        } else {
1055            line.push_str(&truncate(&detail, 80));
1056        }
1057    }
1058    let mut lines = vec![line];
1059    if full {
1060        push_feedback_lines(
1061            &mut lines,
1062            edge.properties.feedback_score,
1063            edge.properties.feedback_count,
1064            edge.properties.feedback_last_ts_ms,
1065            Some("edge_"),
1066        );
1067        if !edge.properties.valid_from.is_empty() {
1068            lines.push(format!("edge_valid_from: {}", edge.properties.valid_from));
1069        }
1070        if !edge.properties.valid_to.is_empty() {
1071            lines.push(format!("edge_valid_to: {}", edge.properties.valid_to));
1072        }
1073    }
1074    lines
1075}
1076
1077fn truncate(value: &str, max_len: usize) -> String {
1078    let char_count = value.chars().count();
1079    if char_count <= max_len {
1080        return value.to_owned();
1081    }
1082    let truncated: String = value.chars().take(max_len.saturating_sub(3)).collect();
1083    format!("{truncated}...")
1084}
1085
1086fn escape_cli_text(value: &str) -> String {
1087    let mut out = String::new();
1088    for ch in value.chars() {
1089        match ch {
1090            '\\' => out.push_str("\\\\"),
1091            '\n' => out.push_str("\\n"),
1092            '\r' => out.push_str("\\r"),
1093            '\t' => out.push_str("\\t"),
1094            _ => out.push(ch),
1095        }
1096    }
1097    out
1098}
1099
1100fn push_description_line(lines: &mut Vec<String>, description: &str, max_len: Option<usize>) {
1101    if description.is_empty() {
1102        return;
1103    }
1104    let escaped = escape_cli_text(description);
1105    let rendered = match max_len {
1106        Some(limit) => truncate(&escaped, limit),
1107        None => escaped,
1108    };
1109    lines.push(format!("desc: {rendered}"));
1110}
1111
1112fn push_feedback_lines(
1113    lines: &mut Vec<String>,
1114    score: f64,
1115    count: u64,
1116    last_ts_ms: Option<u64>,
1117    prefix: Option<&str>,
1118) {
1119    let prefix = prefix.unwrap_or("");
1120    if score != 0.0 {
1121        lines.push(format!("{prefix}feedback_score: {score}"));
1122    }
1123    if count != 0 {
1124        lines.push(format!("{prefix}feedback_count: {count}"));
1125    }
1126    if let Some(ts) = last_ts_ms {
1127        lines.push(format!("{prefix}feedback_last_ts_ms: {ts}"));
1128    }
1129}
1130
1131fn render_attached_note_lines(note: &crate::graph::Note) -> Vec<String> {
1132    let mut lines = vec![format!("! {}", note.id)];
1133    if !note.body.is_empty() {
1134        lines.push(format!("note_body: {}", escape_cli_text(&note.body)));
1135    }
1136    if !note.tags.is_empty() {
1137        lines.push(format!(
1138            "note_tags: {}",
1139            note.tags
1140                .iter()
1141                .map(|tag| escape_cli_text(tag))
1142                .collect::<Vec<_>>()
1143                .join(", ")
1144        ));
1145    }
1146    if !note.author.is_empty() {
1147        lines.push(format!("note_author: {}", escape_cli_text(&note.author)));
1148    }
1149    if !note.created_at.is_empty() {
1150        lines.push(format!("note_created_at: {}", note.created_at));
1151    }
1152    if !note.provenance.is_empty() {
1153        lines.push(format!(
1154            "note_provenance: {}",
1155            escape_cli_text(&note.provenance)
1156        ));
1157    }
1158    if !note.source_files.is_empty() {
1159        lines.push(format!(
1160            "note_sources: {}",
1161            note.source_files
1162                .iter()
1163                .map(|source| escape_cli_text(source))
1164                .collect::<Vec<_>>()
1165                .join(", ")
1166        ));
1167    }
1168    lines
1169}
1170
1171fn find_matches_with_index<'a>(
1172    graph: &'a GraphFile,
1173    query: &str,
1174    limit: usize,
1175    include_features: bool,
1176    mode: FindMode,
1177    index: Option<&Bm25Index>,
1178) -> Vec<ScoredNode<'a>> {
1179    let mut matches = find_all_matches_with_index(graph, query, include_features, mode, index);
1180    matches.truncate(limit);
1181    matches
1182}
1183
1184fn find_all_matches_with_index<'a>(
1185    graph: &'a GraphFile,
1186    query: &str,
1187    include_features: bool,
1188    mode: FindMode,
1189    index: Option<&Bm25Index>,
1190) -> Vec<ScoredNode<'a>> {
1191    let context = FindQueryContext::build(graph);
1192    let mut scored: Vec<ScoredNode<'a>> = match mode {
1193        FindMode::Fuzzy => {
1194            let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
1195            let mut matcher = Matcher::new(Config::DEFAULT);
1196            let candidates = graph
1197                .nodes
1198                .iter()
1199                .filter(|node| include_features || node.r#type != "Feature")
1200                .filter_map(|node| {
1201                    score_node(&context, node, query, &pattern, &mut matcher).map(|score| {
1202                        RawCandidate {
1203                            node,
1204                            raw_relevance: score as f64,
1205                            lexical_boost: 0,
1206                        }
1207                    })
1208                })
1209                .collect();
1210            compose_scores(candidates)
1211        }
1212        FindMode::Bm25 => compose_scores(score_bm25_raw(
1213            graph,
1214            &context,
1215            query,
1216            include_features,
1217            index,
1218        )),
1219    };
1220
1221    scored.sort_by(|left, right| {
1222        right
1223            .score
1224            .cmp(&left.score)
1225            .then_with(|| left.node.id.cmp(&right.node.id))
1226    });
1227    scored
1228}
1229
1230fn compose_scores<'a>(candidates: Vec<RawCandidate<'a>>) -> Vec<ScoredNode<'a>> {
1231    let max_raw = candidates
1232        .iter()
1233        .map(|candidate| candidate.raw_relevance)
1234        .fold(0.0f64, f64::max);
1235    let max_raw_log = max_raw.ln_1p();
1236
1237    candidates
1238        .into_iter()
1239        .filter_map(|candidate| {
1240            if candidate.raw_relevance <= 0.0 {
1241                return None;
1242            }
1243            let normalized_relevance = if max_raw_log > 0.0 {
1244                ((candidate.raw_relevance.ln_1p() / max_raw_log) * 1000.0).round() as i64
1245            } else {
1246                0
1247            };
1248            let feedback = feedback_boost(candidate.node);
1249            let importance = importance_boost(candidate.node);
1250            let authority_raw = feedback + importance;
1251            let relative_cap =
1252                ((normalized_relevance as f64) * SCORE_META_MAX_RATIO).round() as i64;
1253            let authority_cap = relative_cap.max(SCORE_META_MIN_CAP).min(SCORE_META_MAX_CAP);
1254            let authority_applied = authority_raw.clamp(-authority_cap, authority_cap);
1255            let final_score = normalized_relevance + authority_applied;
1256
1257            Some(ScoredNode {
1258                score: final_score,
1259                node: candidate.node,
1260                breakdown: ScoreBreakdown {
1261                    raw_relevance: candidate.raw_relevance,
1262                    normalized_relevance,
1263                    lexical_boost: candidate.lexical_boost,
1264                    feedback_boost: feedback,
1265                    importance_boost: importance,
1266                    authority_raw,
1267                    authority_applied,
1268                    authority_cap,
1269                },
1270            })
1271        })
1272        .collect()
1273}
1274
1275fn feedback_boost(node: &Node) -> i64 {
1276    let count = node.properties.feedback_count as f64;
1277    if count <= 0.0 {
1278        return 0;
1279    }
1280    let avg = node.properties.feedback_score / count;
1281    let confidence = (count.ln_1p() / 3.0).min(1.0);
1282    let scaled = avg * 200.0 * confidence;
1283    scaled.clamp(-300.0, 300.0).round() as i64
1284}
1285
1286fn importance_boost(node: &Node) -> i64 {
1287    (i64::from(node.properties.importance) - IMPORTANCE_NEUTRAL) * IMPORTANCE_STEP_BOOST
1288}
1289
1290fn score_bm25_raw<'a>(
1291    graph: &'a GraphFile,
1292    context: &FindQueryContext<'a>,
1293    query: &str,
1294    include_features: bool,
1295    index: Option<&Bm25Index>,
1296) -> Vec<RawCandidate<'a>> {
1297    let terms = tokenize(query);
1298    if terms.is_empty() {
1299        return Vec::new();
1300    }
1301
1302    if let Some(idx) = index {
1303        let results = idx.search(&terms, graph);
1304        return results
1305            .into_iter()
1306            .filter_map(|(node_id, score)| {
1307                let node = graph.node_by_id(&node_id)?;
1308                if !include_features && node.r#type == "Feature" {
1309                    return None;
1310                }
1311                let document_terms = node_document_terms(context, node);
1312                let lexical_boost = bm25_lexical_boost(&terms, &document_terms);
1313                Some(RawCandidate {
1314                    node,
1315                    raw_relevance: score as f64 * 100.0 + lexical_boost as f64,
1316                    lexical_boost,
1317                })
1318            })
1319            .collect();
1320    }
1321
1322    let mut docs: Vec<(&'a Node, Vec<String>)> = graph
1323        .nodes
1324        .iter()
1325        .filter(|node| include_features || node.r#type != "Feature")
1326        .map(|node| (node, node_document_terms(context, node)))
1327        .collect();
1328
1329    if docs.is_empty() {
1330        return Vec::new();
1331    }
1332
1333    let mut df: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
1334    for term in &terms {
1335        let mut count = 0usize;
1336        for (_, tokens) in &docs {
1337            if tokens.iter().any(|t| t == term) {
1338                count += 1;
1339            }
1340        }
1341        df.insert(term.as_str(), count);
1342    }
1343
1344    let total_docs = docs.len() as f64;
1345    let avgdl = docs
1346        .iter()
1347        .map(|(_, tokens)| tokens.len() as f64)
1348        .sum::<f64>()
1349        / total_docs;
1350
1351    let mut scored = Vec::new();
1352
1353    for (node, tokens) in docs.drain(..) {
1354        let dl = tokens.len() as f64;
1355        if dl == 0.0 {
1356            continue;
1357        }
1358        let mut score = 0.0f64;
1359        for term in &terms {
1360            let tf = tokens.iter().filter(|t| *t == term).count() as f64;
1361            if tf == 0.0 {
1362                continue;
1363            }
1364            let df_t = *df.get(term.as_str()).unwrap_or(&0) as f64;
1365            let idf = (1.0 + (total_docs - df_t + 0.5) / (df_t + 0.5)).ln();
1366            let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl));
1367            score += idf * (tf * (BM25_K1 + 1.0) / denom);
1368        }
1369        if score > 0.0 {
1370            let lexical_boost = bm25_lexical_boost(&terms, &tokens);
1371            scored.push(RawCandidate {
1372                node,
1373                raw_relevance: score * 100.0 + lexical_boost as f64,
1374                lexical_boost,
1375            });
1376        }
1377    }
1378
1379    scored
1380}
1381
1382fn node_document_terms(context: &FindQueryContext<'_>, node: &Node) -> Vec<String> {
1383    let mut tokens = Vec::new();
1384    push_terms(&mut tokens, &node.id, BM25_ID_WEIGHT);
1385    push_terms(&mut tokens, &node.name, BM25_NAME_WEIGHT);
1386    push_terms(
1387        &mut tokens,
1388        &node.properties.description,
1389        BM25_DESCRIPTION_WEIGHT,
1390    );
1391    for alias in &node.properties.alias {
1392        push_terms(&mut tokens, alias, BM25_ALIAS_WEIGHT);
1393    }
1394    for fact in &node.properties.key_facts {
1395        push_terms(&mut tokens, fact, BM25_FACT_WEIGHT);
1396    }
1397    for note in context.notes_for(&node.id) {
1398        push_terms(&mut tokens, &note.body, BM25_NOTE_BODY_WEIGHT);
1399        for tag in &note.tags {
1400            push_terms(&mut tokens, tag, BM25_NOTE_TAG_WEIGHT);
1401        }
1402    }
1403    for neighbor in context.neighbors_for(&node.id) {
1404        push_terms(&mut tokens, &neighbor.id, BM25_NEIGHBOR_WEIGHT);
1405        push_terms(&mut tokens, &neighbor.name, BM25_NEIGHBOR_WEIGHT);
1406        push_terms(
1407            &mut tokens,
1408            &neighbor.properties.description,
1409            BM25_NEIGHBOR_WEIGHT,
1410        );
1411        for alias in &neighbor.properties.alias {
1412            push_terms(&mut tokens, alias, BM25_NEIGHBOR_WEIGHT);
1413        }
1414    }
1415    tokens
1416}
1417
1418fn push_terms(target: &mut Vec<String>, value: &str, weight: usize) {
1419    if value.is_empty() {
1420        return;
1421    }
1422    let terms = tokenize(value);
1423    for _ in 0..weight {
1424        target.extend(terms.iter().cloned());
1425    }
1426}
1427
1428fn tokenize(text: &str) -> Vec<String> {
1429    let mut tokens = Vec::new();
1430    let mut current = String::new();
1431    for ch in text.chars() {
1432        if ch.is_alphanumeric() {
1433            for lower in ch.to_lowercase() {
1434                current.push(lower);
1435            }
1436        } else if !current.is_empty() {
1437            tokens.push(std::mem::take(&mut current));
1438        }
1439    }
1440    if !current.is_empty() {
1441        tokens.push(current);
1442    }
1443    tokens
1444}
1445
1446fn bm25_lexical_boost(query_terms: &[String], document_terms: &[String]) -> i64 {
1447    if query_terms.is_empty() || document_terms.is_empty() {
1448        return 0;
1449    }
1450    if query_terms.len() > 1 && contains_token_phrase(document_terms, query_terms) {
1451        return BM25_PHRASE_MATCH_BOOST;
1452    }
1453    let document_vocab: HashSet<&str> = document_terms.iter().map(String::as_str).collect();
1454    let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
1455    let matched_tokens = query_vocab
1456        .iter()
1457        .filter(|token| document_vocab.contains(**token))
1458        .count() as i64;
1459    if matched_tokens == 0 {
1460        return 0;
1461    }
1462    let query_token_count = query_vocab.len() as i64;
1463    (matched_tokens * BM25_TOKEN_MATCH_BOOST + query_token_count - 1) / query_token_count
1464}
1465
1466fn contains_token_phrase(document_terms: &[String], query_terms: &[String]) -> bool {
1467    if query_terms.is_empty() || query_terms.len() > document_terms.len() {
1468        return false;
1469    }
1470    document_terms
1471        .windows(query_terms.len())
1472        .any(|window| window == query_terms)
1473}
1474
1475fn score_node(
1476    context: &FindQueryContext<'_>,
1477    node: &Node,
1478    query: &str,
1479    pattern: &Pattern,
1480    matcher: &mut Matcher,
1481) -> Option<u32> {
1482    let mut primary_score = 0;
1483    let mut primary_hits = 0;
1484
1485    let id_score = score_primary_field(query, pattern, matcher, &node.id, 4);
1486    if id_score > 0 {
1487        primary_hits += 1;
1488    }
1489    primary_score += id_score;
1490
1491    let name_score = score_primary_field(query, pattern, matcher, &node.name, 3);
1492    if name_score > 0 {
1493        primary_hits += 1;
1494    }
1495    primary_score += name_score;
1496
1497    for alias in &node.properties.alias {
1498        let alias_score = score_primary_field(query, pattern, matcher, alias, 3);
1499        if alias_score > 0 {
1500            primary_hits += 1;
1501        }
1502        primary_score += alias_score;
1503    }
1504
1505    let mut contextual_score = score_secondary_field(
1506        query,
1507        pattern,
1508        matcher,
1509        &node.properties.description,
1510        FUZZY_DESCRIPTION_WEIGHT,
1511    );
1512    for fact in &node.properties.key_facts {
1513        contextual_score += score_secondary_field(query, pattern, matcher, fact, FUZZY_FACT_WEIGHT);
1514    }
1515    contextual_score += score_notes_context(context, node, query, pattern, matcher);
1516
1517    let neighbor_context = score_neighbor_context(context, node, query, pattern, matcher)
1518        .min(FUZZY_NEIGHBOR_CONTEXT_CAP);
1519    contextual_score += if primary_hits > 0 {
1520        neighbor_context / 2
1521    } else {
1522        neighbor_context
1523    };
1524
1525    if primary_hits == 0 {
1526        contextual_score /= FUZZY_NO_PRIMARY_CONTEXT_DIVISOR;
1527    }
1528
1529    let total = primary_score + contextual_score;
1530    (total > 0).then_some(total)
1531}
1532
1533fn score_notes_context(
1534    context: &FindQueryContext<'_>,
1535    node: &Node,
1536    query: &str,
1537    pattern: &Pattern,
1538    matcher: &mut Matcher,
1539) -> u32 {
1540    let mut total = 0;
1541    for note in context.notes_for(&node.id) {
1542        total += score_secondary_field(query, pattern, matcher, &note.body, FUZZY_NOTE_BODY_WEIGHT);
1543        for tag in &note.tags {
1544            total += score_secondary_field(query, pattern, matcher, tag, FUZZY_NOTE_TAG_WEIGHT);
1545        }
1546    }
1547    total
1548}
1549
1550fn score_neighbor_context(
1551    context: &FindQueryContext<'_>,
1552    node: &Node,
1553    query: &str,
1554    pattern: &Pattern,
1555    matcher: &mut Matcher,
1556) -> u32 {
1557    let mut best = 0;
1558
1559    for neighbor in context.neighbors_for(&node.id) {
1560        let mut score = score_secondary_field(query, pattern, matcher, &neighbor.id, 1)
1561            + score_secondary_field(query, pattern, matcher, &neighbor.name, 1)
1562            + score_secondary_field(query, pattern, matcher, &neighbor.properties.description, 1);
1563
1564        for alias in &neighbor.properties.alias {
1565            score += score_secondary_field(query, pattern, matcher, alias, 1);
1566        }
1567
1568        best = best.max(score);
1569    }
1570
1571    best
1572}
1573
1574fn score_field(pattern: &Pattern, matcher: &mut Matcher, value: &str) -> Option<u32> {
1575    if value.is_empty() {
1576        return None;
1577    }
1578    let mut buf = Vec::new();
1579    let haystack = Utf32Str::new(value, &mut buf);
1580    pattern.score(haystack, matcher)
1581}
1582
1583fn score_primary_field(
1584    query: &str,
1585    pattern: &Pattern,
1586    matcher: &mut Matcher,
1587    value: &str,
1588    weight: u32,
1589) -> u32 {
1590    let bonus = textual_bonus(query, value);
1591    let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
1592    if bonus == 0 && fuzzy == 0 {
1593        return 0;
1594    }
1595    (fuzzy + bonus) * weight
1596}
1597
1598fn score_secondary_field(
1599    query: &str,
1600    pattern: &Pattern,
1601    matcher: &mut Matcher,
1602    value: &str,
1603    weight: u32,
1604) -> u32 {
1605    let bonus = textual_bonus(query, value);
1606    let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
1607    if bonus == 0 && fuzzy == 0 {
1608        return 0;
1609    }
1610    (fuzzy + bonus / 2) * weight
1611}
1612
1613fn textual_bonus(query: &str, value: &str) -> u32 {
1614    let query = query.trim().to_lowercase();
1615    let value = value.to_lowercase();
1616
1617    if value == query {
1618        return 400;
1619    }
1620    if value.contains(&query) {
1621        return 200;
1622    }
1623
1624    query
1625        .split_whitespace()
1626        .map(|token| {
1627            if value.contains(token) {
1628                80
1629            } else if is_subsequence(token, &value) {
1630                40
1631            } else {
1632                0
1633            }
1634        })
1635        .sum()
1636}
1637
1638fn is_subsequence(needle: &str, haystack: &str) -> bool {
1639    if needle.is_empty() {
1640        return false;
1641    }
1642
1643    let mut chars = needle.chars();
1644    let mut current = match chars.next() {
1645        Some(ch) => ch,
1646        None => return false,
1647    };
1648
1649    for ch in haystack.chars() {
1650        if ch == current {
1651            match chars.next() {
1652                Some(next) => current = next,
1653                None => return true,
1654            }
1655        }
1656    }
1657
1658    false
1659}
1660
1661#[cfg(test)]
1662mod tests {
1663    use super::*;
1664
1665    fn make_node(
1666        id: &str,
1667        name: &str,
1668        description: &str,
1669        key_facts: &[&str],
1670        alias: &[&str],
1671        importance: u8,
1672        feedback_score: f64,
1673        feedback_count: u64,
1674    ) -> Node {
1675        let mut properties = crate::graph::NodeProperties::default();
1676        properties.description = description.to_owned();
1677        properties.key_facts = key_facts.iter().map(|v| (*v).to_owned()).collect();
1678        properties.alias = alias.iter().map(|v| (*v).to_owned()).collect();
1679        properties.importance = importance;
1680        properties.feedback_score = feedback_score;
1681        properties.feedback_count = feedback_count;
1682        Node {
1683            id: id.to_owned(),
1684            r#type: "Concept".to_owned(),
1685            name: name.to_owned(),
1686            properties,
1687            source_files: Vec::new(),
1688        }
1689    }
1690
1691    fn score_for(results: &[ScoredNode<'_>], id: &str) -> i64 {
1692        results
1693            .iter()
1694            .find(|item| item.node.id == id)
1695            .map(|item| item.score)
1696            .expect("score for node")
1697    }
1698
1699    #[test]
1700    fn textual_bonus_tiers_are_stable() {
1701        assert_eq!(textual_bonus("abc", "abc"), 400);
1702        assert_eq!(textual_bonus("abc", "xxabcxx"), 200);
1703        assert_eq!(textual_bonus("abc def", "aa abc and def zz"), 160);
1704        assert_eq!(textual_bonus("abc", "aXbYc"), 40);
1705        assert_eq!(textual_bonus("abc", "zzz"), 0);
1706    }
1707
1708    #[test]
1709    fn tokenize_handles_unicode_casefolding() {
1710        let tokens = tokenize("ŁÓDŹ smart-home");
1711        assert_eq!(tokens, vec!["łódź", "smart", "home"]);
1712    }
1713
1714    #[test]
1715    fn bm25_lexical_boost_prefers_phrase_then_tokens() {
1716        let query_terms = tokenize("smart home api");
1717        assert_eq!(
1718            bm25_lexical_boost(&query_terms, &tokenize("x smart home api y")),
1719            120
1720        );
1721        assert_eq!(
1722            bm25_lexical_boost(&query_terms, &tokenize("smart x api y home")),
1723            45
1724        );
1725        assert_eq!(
1726            bm25_lexical_boost(&query_terms, &tokenize("nothing here")),
1727            0
1728        );
1729    }
1730
1731    #[test]
1732    fn score_node_uses_key_facts_and_notes_without_primary_match() {
1733        let node = make_node(
1734            "concept:gateway",
1735            "Gateway",
1736            "",
1737            &["Autentykacja OAuth2 przez konto producenta"],
1738            &[],
1739            4,
1740            0.0,
1741            0,
1742        );
1743        let mut graph = GraphFile::new("test");
1744        graph.nodes.push(node.clone());
1745        graph.notes.push(crate::graph::Note {
1746            id: "note:oauth".to_owned(),
1747            node_id: node.id.clone(),
1748            body: "Token refresh przez OAuth2".to_owned(),
1749            tags: vec!["oauth2".to_owned()],
1750            ..Default::default()
1751        });
1752
1753        let pattern = Pattern::parse(
1754            "oauth2 producenta",
1755            CaseMatching::Ignore,
1756            Normalization::Smart,
1757        );
1758        let context = FindQueryContext::build(&graph);
1759        let mut matcher = Matcher::new(Config::DEFAULT);
1760        let score = score_node(&context, &node, "oauth2 producenta", &pattern, &mut matcher);
1761        assert!(score.is_some_and(|value| value > 0));
1762
1763        let empty_graph = GraphFile::new("empty");
1764        let empty_node = make_node("concept:gateway", "Gateway", "", &[], &[], 4, 0.0, 0);
1765        let empty_context = FindQueryContext::build(&empty_graph);
1766        let mut matcher = Matcher::new(Config::DEFAULT);
1767        let empty_score = score_node(
1768            &empty_context,
1769            &empty_node,
1770            "oauth2 producenta",
1771            &pattern,
1772            &mut matcher,
1773        );
1774        assert!(empty_score.is_none());
1775    }
1776
1777    #[test]
1778    fn score_bm25_respects_importance_boost_for_equal_documents() {
1779        let mut graph = GraphFile::new("test");
1780        graph.nodes.push(make_node(
1781            "concept:high",
1782            "High",
1783            "smart home api",
1784            &[],
1785            &[],
1786            6,
1787            0.0,
1788            0,
1789        ));
1790        graph.nodes.push(make_node(
1791            "concept:low",
1792            "Low",
1793            "smart home api",
1794            &[],
1795            &[],
1796            1,
1797            0.0,
1798            0,
1799        ));
1800
1801        let results =
1802            find_all_matches_with_index(&graph, "smart home api", true, FindMode::Bm25, None);
1803        let high_score = score_for(&results, "concept:high");
1804        let low_score = score_for(&results, "concept:low");
1805        assert!(high_score > low_score);
1806    }
1807
1808    #[test]
1809    fn final_score_caps_authority_boost_for_weak_relevance() {
1810        let weak = make_node(
1811            "concept:weak",
1812            "Weak",
1813            "smart home api",
1814            &[],
1815            &[],
1816            6,
1817            300.0,
1818            1,
1819        );
1820        let strong = make_node(
1821            "concept:strong",
1822            "Strong",
1823            "smart home api smart home api smart home api smart home api",
1824            &[],
1825            &[],
1826            4,
1827            0.0,
1828            0,
1829        );
1830        let candidates = vec![
1831            RawCandidate {
1832                node: &weak,
1833                raw_relevance: 12.0,
1834                lexical_boost: 0,
1835            },
1836            RawCandidate {
1837                node: &strong,
1838                raw_relevance: 100.0,
1839                lexical_boost: 0,
1840            },
1841        ];
1842        let scored = compose_scores(candidates);
1843        let weak_scored = scored
1844            .iter()
1845            .find(|item| item.node.id == "concept:weak")
1846            .expect("weak node");
1847        assert_eq!(
1848            weak_scored.breakdown.authority_applied,
1849            weak_scored.breakdown.authority_cap
1850        );
1851        assert!(weak_scored.breakdown.authority_raw > weak_scored.breakdown.authority_cap);
1852    }
1853
1854    #[test]
1855    fn importance_and_feedback_boost_have_expected_ranges() {
1856        let high_importance = make_node("concept:high", "High", "", &[], &[], 6, 0.0, 0);
1857        let low_importance = make_node("concept:low", "Low", "", &[], &[], 1, 0.0, 0);
1858        assert_eq!(importance_boost(&high_importance), 44);
1859        assert_eq!(importance_boost(&low_importance), -66);
1860
1861        let positive = make_node("concept:pos", "Pos", "", &[], &[], 4, 1.0, 1);
1862        let negative = make_node("concept:neg", "Neg", "", &[], &[], 4, -2.0, 1);
1863        let saturated = make_node("concept:sat", "Sat", "", &[], &[], 4, 300.0, 1);
1864        assert_eq!(feedback_boost(&positive), 46);
1865        assert_eq!(feedback_boost(&negative), -92);
1866        assert_eq!(feedback_boost(&saturated), 300);
1867    }
1868}
kg/output.rs

kg/
output.rs