kg/
output.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2
3use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
4use nucleo_matcher::{Config, Matcher, Utf32Str};
5
6use crate::graph::{Edge, GraphFile, Node, Note};
7use crate::index::Bm25Index;
8use crate::text_norm;
9
10const BM25_K1: f64 = 1.5;
11const BM25_B: f64 = 0.75;
12const DEFAULT_TARGET_CHARS: usize = 4200;
13const MIN_TARGET_CHARS: usize = 300;
14const MAX_TARGET_CHARS: usize = 12_000;
15const FUZZY_NEIGHBOR_CONTEXT_CAP: u32 = 220;
16const FUZZY_NO_PRIMARY_CONTEXT_DIVISOR: u32 = 3;
17const FUZZY_NEIGHBOR_CONTEXT_DIVISOR: u32 = 3;
18const FUZZY_DESCRIPTION_WEIGHT: u32 = 2;
19const FUZZY_FACT_WEIGHT: u32 = 2;
20const FUZZY_NOTE_BODY_WEIGHT: u32 = 1;
21const FUZZY_NOTE_TAG_WEIGHT: u32 = 2;
22const BM25_PHRASE_MATCH_BOOST: i64 = 120;
23const BM25_PROXIMITY_MATCH_BOOST: i64 = 80;
24const BM25_TOKEN_MATCH_BOOST: i64 = 45;
25const BM25_ID_WEIGHT: usize = 5;
26const BM25_NAME_WEIGHT: usize = 4;
27const BM25_ALIAS_WEIGHT: usize = 4;
28const BM25_DESCRIPTION_WEIGHT: usize = 2;
29const BM25_FACT_WEIGHT: usize = 2;
30const BM25_NOTE_BODY_WEIGHT: usize = 1;
31const BM25_NOTE_TAG_WEIGHT: usize = 1;
32const BM25_NEIGHBOR_WEIGHT: usize = 1;
33const BM25_SELF_CONTEXT_WEIGHT: f64 = 3.0;
34const BM25_NEIGHBOR_CONTEXT_WEIGHT: f64 = 1.0;
35const BM25_PROXIMITY_WINDOW_TOKENS: usize = 6;
36const FACT_VOLUME_BASE_CHARS: f64 = 500.0;
37const FACT_VOLUME_MIN_FACTOR: f64 = 0.35;
38const IMPORTANCE_NEUTRAL: f64 = 0.5;
39const IMPORTANCE_MAX_ABS_BOOST: f64 = 66.0;
40const SCORE_META_MAX_RATIO: f64 = 0.35;
41const SCORE_META_MIN_CAP: i64 = 30;
42const SCORE_META_MAX_CAP: i64 = 240;
43
44#[derive(Debug, Clone, Copy)]
45pub enum FindMode {
46    Fuzzy,
47    Bm25,
48    Hybrid,
49}
50
51#[derive(Debug, Clone, Copy)]
52pub struct FindTune {
53    pub bm25: f64,
54    pub fuzzy: f64,
55    pub vector: f64,
56}
57
58impl FindTune {
59    pub fn parse(raw: &str) -> Option<Self> {
60        let mut tune = Self::default();
61        for part in raw.split(',') {
62            let (key, value) = part.split_once('=')?;
63            let value = value.trim().parse::<f64>().ok()?;
64            match key.trim() {
65                "bm25" => tune.bm25 = value,
66                "fuzzy" => tune.fuzzy = value,
67                "vector" => tune.vector = value,
68                _ => return None,
69            }
70        }
71        Some(tune.clamped())
72    }
73
74    fn clamped(self) -> Self {
75        Self {
76            bm25: self.bm25.clamp(0.0, 1.0),
77            fuzzy: self.fuzzy.clamp(0.0, 1.0),
78            vector: self.vector.clamp(0.0, 1.0),
79        }
80    }
81}
82
83impl Default for FindTune {
84    fn default() -> Self {
85        Self {
86            bm25: 0.55,
87            fuzzy: 0.35,
88            vector: 0.10,
89        }
90    }
91}
92
93#[derive(Clone, Copy)]
94struct ScoredNode<'a> {
95    score: i64,
96    node: &'a Node,
97    breakdown: ScoreBreakdown,
98}
99
100#[derive(Debug, Clone, Copy)]
101struct ScoreBreakdown {
102    raw_relevance: f64,
103    normalized_relevance: i64,
104    lexical_boost: i64,
105    feedback_boost: i64,
106    importance_boost: i64,
107    authority_raw: i64,
108    authority_applied: i64,
109    authority_cap: i64,
110}
111
112struct RawCandidate<'a> {
113    node: &'a Node,
114    raw_relevance: f64,
115    lexical_boost: i64,
116}
117
118struct FindQueryContext<'a> {
119    notes_by_node: HashMap<&'a str, Vec<&'a Note>>,
120    neighbors_by_node: HashMap<&'a str, Vec<&'a Node>>,
121}
122
123impl<'a> FindQueryContext<'a> {
124    fn build(graph: &'a GraphFile) -> Self {
125        let node_by_id: HashMap<&'a str, &'a Node> = graph
126            .nodes
127            .iter()
128            .map(|node| (node.id.as_str(), node))
129            .collect();
130
131        let mut notes_by_node: HashMap<&'a str, Vec<&'a Note>> = HashMap::new();
132        for note in &graph.notes {
133            notes_by_node
134                .entry(note.node_id.as_str())
135                .or_default()
136                .push(note);
137        }
138
139        let mut neighbors_by_node: HashMap<&'a str, Vec<&'a Node>> = HashMap::new();
140        for edge in &graph.edges {
141            if let (Some(source), Some(target)) = (
142                node_by_id.get(edge.source_id.as_str()),
143                node_by_id.get(edge.target_id.as_str()),
144            ) {
145                neighbors_by_node
146                    .entry(source.id.as_str())
147                    .or_default()
148                    .push(*target);
149                neighbors_by_node
150                    .entry(target.id.as_str())
151                    .or_default()
152                    .push(*source);
153            }
154        }
155
156        for neighbors in neighbors_by_node.values_mut() {
157            neighbors.sort_by(|left, right| left.id.cmp(&right.id));
158            neighbors.dedup_by(|left, right| left.id == right.id);
159        }
160
161        Self {
162            notes_by_node,
163            neighbors_by_node,
164        }
165    }
166
167    fn notes_for(&self, node_id: &str) -> &[&'a Note] {
168        self.notes_by_node
169            .get(node_id)
170            .map(Vec::as_slice)
171            .unwrap_or(&[])
172    }
173
174    fn neighbors_for(&self, node_id: &str) -> &[&'a Node] {
175        self.neighbors_by_node
176            .get(node_id)
177            .map(Vec::as_slice)
178            .unwrap_or(&[])
179    }
180}
181
182#[derive(Debug, Clone)]
183pub struct ScoreBreakdownResult {
184    pub raw_relevance: f64,
185    pub normalized_relevance: i64,
186    pub lexical_boost: i64,
187    pub feedback_boost: i64,
188    pub importance_boost: i64,
189    pub authority_raw: i64,
190    pub authority_applied: i64,
191    pub authority_cap: i64,
192}
193
194#[derive(Debug, Clone)]
195pub struct ScoredNodeResult {
196    pub score: i64,
197    pub node: Node,
198    pub breakdown: ScoreBreakdownResult,
199}
200
201pub fn render_find(
202    graph: &GraphFile,
203    queries: &[String],
204    limit: usize,
205    include_features: bool,
206    include_metadata: bool,
207    mode: FindMode,
208    full: bool,
209) -> String {
210    render_find_with_index(
211        graph,
212        queries,
213        limit,
214        include_features,
215        include_metadata,
216        mode,
217        full,
218        false,
219        None,
220    )
221}
222
223pub fn render_find_with_index(
224    graph: &GraphFile,
225    queries: &[String],
226    limit: usize,
227    include_features: bool,
228    include_metadata: bool,
229    mode: FindMode,
230    full: bool,
231    debug_score: bool,
232    index: Option<&Bm25Index>,
233) -> String {
234    render_find_with_index_tuned(
235        graph,
236        queries,
237        limit,
238        include_features,
239        include_metadata,
240        mode,
241        full,
242        debug_score,
243        index,
244        None,
245    )
246}
247
248pub fn render_find_with_index_tuned(
249    graph: &GraphFile,
250    queries: &[String],
251    limit: usize,
252    include_features: bool,
253    include_metadata: bool,
254    mode: FindMode,
255    full: bool,
256    debug_score: bool,
257    index: Option<&Bm25Index>,
258    tune: Option<&FindTune>,
259) -> String {
260    let mut sections = Vec::new();
261    for query in queries {
262        let matches = find_all_matches_with_index(
263            graph,
264            query,
265            include_features,
266            include_metadata,
267            mode,
268            index,
269            tune,
270        );
271        let total = matches.len();
272        let visible: Vec<_> = matches.into_iter().take(limit).collect();
273        let shown = visible.len();
274        let mut lines = vec![render_result_header(query, shown, total)];
275        for scored in visible {
276            lines.push(render_scored_node_block(
277                graph,
278                &scored,
279                full,
280                debug_score,
281                Some(query.as_str()),
282            ));
283        }
284        push_limit_omission_line(&mut lines, shown, total);
285        sections.push(lines.join("\n"));
286    }
287    format!("{}\n", sections.join("\n\n"))
288}
289
290pub fn find_nodes(
291    graph: &GraphFile,
292    query: &str,
293    limit: usize,
294    include_features: bool,
295    include_metadata: bool,
296    mode: FindMode,
297) -> Vec<Node> {
298    find_matches_with_index(
299        graph,
300        query,
301        limit,
302        include_features,
303        include_metadata,
304        mode,
305        None,
306        None,
307    )
308    .into_iter()
309    .map(|item| item.node.clone())
310    .collect()
311}
312
313pub fn find_nodes_with_index(
314    graph: &GraphFile,
315    query: &str,
316    limit: usize,
317    include_features: bool,
318    include_metadata: bool,
319    mode: FindMode,
320    index: Option<&Bm25Index>,
321) -> Vec<Node> {
322    find_matches_with_index(
323        graph,
324        query,
325        limit,
326        include_features,
327        include_metadata,
328        mode,
329        index,
330        None,
331    )
332    .into_iter()
333    .map(|item| item.node.clone())
334    .collect()
335}
336
337pub fn find_nodes_with_index_tuned(
338    graph: &GraphFile,
339    query: &str,
340    limit: usize,
341    include_features: bool,
342    include_metadata: bool,
343    mode: FindMode,
344    index: Option<&Bm25Index>,
345    tune: Option<&FindTune>,
346) -> Vec<Node> {
347    find_matches_with_index(
348        graph,
349        query,
350        limit,
351        include_features,
352        include_metadata,
353        mode,
354        index,
355        tune,
356    )
357    .into_iter()
358    .map(|item| item.node.clone())
359    .collect()
360}
361
362pub fn find_nodes_and_total_with_index(
363    graph: &GraphFile,
364    query: &str,
365    limit: usize,
366    include_features: bool,
367    include_metadata: bool,
368    mode: FindMode,
369    index: Option<&Bm25Index>,
370) -> (usize, Vec<Node>) {
371    let matches = find_all_matches_with_index(
372        graph,
373        query,
374        include_features,
375        include_metadata,
376        mode,
377        index,
378        None,
379    );
380    let total = matches.len();
381    let nodes = matches
382        .into_iter()
383        .take(limit)
384        .map(|item| item.node.clone())
385        .collect();
386    (total, nodes)
387}
388
389pub fn find_scored_nodes_and_total_with_index(
390    graph: &GraphFile,
391    query: &str,
392    limit: usize,
393    include_features: bool,
394    include_metadata: bool,
395    mode: FindMode,
396    index: Option<&Bm25Index>,
397) -> (usize, Vec<ScoredNodeResult>) {
398    find_scored_nodes_and_total_with_index_tuned(
399        graph,
400        query,
401        limit,
402        include_features,
403        include_metadata,
404        mode,
405        index,
406        None,
407    )
408}
409
410pub fn find_scored_nodes_and_total_with_index_tuned(
411    graph: &GraphFile,
412    query: &str,
413    limit: usize,
414    include_features: bool,
415    include_metadata: bool,
416    mode: FindMode,
417    index: Option<&Bm25Index>,
418    tune: Option<&FindTune>,
419) -> (usize, Vec<ScoredNodeResult>) {
420    let matches = find_all_matches_with_index(
421        graph,
422        query,
423        include_features,
424        include_metadata,
425        mode,
426        index,
427        tune,
428    );
429    let total = matches.len();
430    let nodes = matches
431        .into_iter()
432        .take(limit)
433        .map(|item| ScoredNodeResult {
434            score: item.score,
435            node: item.node.clone(),
436            breakdown: ScoreBreakdownResult {
437                raw_relevance: item.breakdown.raw_relevance,
438                normalized_relevance: item.breakdown.normalized_relevance,
439                lexical_boost: item.breakdown.lexical_boost,
440                feedback_boost: item.breakdown.feedback_boost,
441                importance_boost: item.breakdown.importance_boost,
442                authority_raw: item.breakdown.authority_raw,
443                authority_applied: item.breakdown.authority_applied,
444                authority_cap: item.breakdown.authority_cap,
445            },
446        })
447        .collect();
448    (total, nodes)
449}
450
451pub fn count_find_results(
452    graph: &GraphFile,
453    queries: &[String],
454    limit: usize,
455    include_features: bool,
456    include_metadata: bool,
457    mode: FindMode,
458) -> usize {
459    count_find_results_with_index(
460        graph,
461        queries,
462        limit,
463        include_features,
464        include_metadata,
465        mode,
466        None,
467    )
468}
469
470pub fn count_find_results_with_index(
471    graph: &GraphFile,
472    queries: &[String],
473    _limit: usize,
474    include_features: bool,
475    include_metadata: bool,
476    mode: FindMode,
477    index: Option<&Bm25Index>,
478) -> usize {
479    let mut total = 0;
480    for query in queries {
481        total += find_all_matches_with_index(
482            graph,
483            query,
484            include_features,
485            include_metadata,
486            mode,
487            index,
488            None,
489        )
490        .len();
491    }
492    total
493}
494
495pub fn render_node(graph: &GraphFile, node: &Node, full: bool) -> String {
496    format!("{}\n", render_node_block(graph, node, full))
497}
498
499pub fn render_node_adaptive(graph: &GraphFile, node: &Node, target_chars: Option<usize>) -> String {
500    let target = clamp_target_chars(target_chars);
501    let full = format!("{}\n", render_node_block(graph, node, true));
502    if fits_target_chars(&full, target) {
503        return full;
504    }
505    let mut candidates = Vec::new();
506    for (depth, detail, edge_cap) in [
507        (0usize, DetailLevel::Rich, 8usize),
508        (1usize, DetailLevel::Rich, 8usize),
509        (2usize, DetailLevel::Rich, 6usize),
510        (2usize, DetailLevel::Compact, 6usize),
511        (2usize, DetailLevel::Minimal, 2usize),
512    ] {
513        let rendered = render_single_node_candidate(graph, node, depth, detail, edge_cap);
514        candidates.push(Candidate {
515            rendered,
516            depth,
517            detail,
518            shown_nodes: 1 + depth,
519        });
520    }
521    pick_best_candidate(candidates, target)
522}
523
524pub fn render_find_adaptive_with_index(
525    graph: &GraphFile,
526    queries: &[String],
527    limit: usize,
528    include_features: bool,
529    include_metadata: bool,
530    mode: FindMode,
531    target_chars: Option<usize>,
532    debug_score: bool,
533    index: Option<&Bm25Index>,
534) -> String {
535    render_find_adaptive_with_index_tuned(
536        graph,
537        queries,
538        limit,
539        include_features,
540        include_metadata,
541        mode,
542        target_chars,
543        debug_score,
544        index,
545        None,
546    )
547}
548
549pub fn render_find_adaptive_with_index_tuned(
550    graph: &GraphFile,
551    queries: &[String],
552    limit: usize,
553    include_features: bool,
554    include_metadata: bool,
555    mode: FindMode,
556    target_chars: Option<usize>,
557    debug_score: bool,
558    index: Option<&Bm25Index>,
559    tune: Option<&FindTune>,
560) -> String {
561    let target = clamp_target_chars(target_chars);
562    let mut sections = Vec::new();
563    for query in queries {
564        let matches = find_all_matches_with_index(
565            graph,
566            query,
567            include_features,
568            include_metadata,
569            mode,
570            index,
571            tune,
572        );
573        let total = matches.len();
574        let visible: Vec<_> = matches.into_iter().take(limit).collect();
575        let section = if visible.len() == 1 {
576            render_single_result_section(graph, query, &visible[0], total, target, debug_score)
577        } else {
578            render_multi_result_section(graph, query, &visible, total, target, debug_score)
579        };
580        sections.push(section);
581    }
582    format!("{}\n", sections.join("\n\n"))
583}
584
585#[derive(Clone, Copy)]
586enum DetailLevel {
587    Rich,
588    Compact,
589    Minimal,
590}
591
592struct Candidate {
593    rendered: String,
594    depth: usize,
595    detail: DetailLevel,
596    shown_nodes: usize,
597}
598
599impl DetailLevel {
600    fn utility_bonus(self) -> usize {
601        match self {
602            DetailLevel::Rich => 20,
603            DetailLevel::Compact => 10,
604            DetailLevel::Minimal => 0,
605        }
606    }
607}
608
609fn clamp_target_chars(target_chars: Option<usize>) -> usize {
610    target_chars
611        .unwrap_or(DEFAULT_TARGET_CHARS)
612        .clamp(MIN_TARGET_CHARS, MAX_TARGET_CHARS)
613}
614
615fn render_single_result_section(
616    graph: &GraphFile,
617    query: &str,
618    node: &ScoredNode<'_>,
619    total_available: usize,
620    target: usize,
621    debug_score: bool,
622) -> String {
623    let header = render_result_header(query, 1, total_available);
624    let full = render_single_result_candidate(
625        graph,
626        query,
627        &header,
628        node,
629        total_available,
630        0,
631        DetailLevel::Rich,
632        8,
633        true,
634        debug_score,
635    );
636    if fits_target_chars(&full, target) {
637        return full.trim_end().to_owned();
638    }
639    let mut candidates = Vec::new();
640    for (depth, detail, edge_cap) in [
641        (0usize, DetailLevel::Rich, 8usize),
642        (1usize, DetailLevel::Rich, 8usize),
643        (2usize, DetailLevel::Rich, 6usize),
644        (2usize, DetailLevel::Compact, 6usize),
645        (2usize, DetailLevel::Minimal, 2usize),
646    ] {
647        candidates.push(Candidate {
648            rendered: render_single_result_candidate(
649                graph,
650                query,
651                &header,
652                node,
653                total_available,
654                depth,
655                detail,
656                edge_cap,
657                false,
658                debug_score,
659            ),
660            depth,
661            detail,
662            shown_nodes: 1 + depth,
663        });
664    }
665    pick_best_candidate(candidates, target)
666        .trim_end()
667        .to_owned()
668}
669
670fn render_multi_result_section(
671    graph: &GraphFile,
672    query: &str,
673    nodes: &[ScoredNode<'_>],
674    total_available: usize,
675    target: usize,
676    debug_score: bool,
677) -> String {
678    let visible_total = nodes.len();
679    let full = render_full_result_section(graph, query, nodes, total_available, debug_score);
680    if fits_target_chars(&full, target) {
681        return full;
682    }
683    let mut candidates = Vec::new();
684    let full_cap = visible_total;
685    let mid_cap = full_cap.min(5);
686    let low_cap = full_cap.min(3);
687
688    for (detail, edge_cap, result_cap, depth) in [
689        (DetailLevel::Rich, 4usize, full_cap.min(4), 0usize),
690        (DetailLevel::Compact, 3usize, full_cap, 0usize),
691        (DetailLevel::Rich, 2usize, mid_cap, 1usize),
692        (DetailLevel::Compact, 1usize, full_cap, 0usize),
693        (DetailLevel::Minimal, 1usize, mid_cap, 0usize),
694        (DetailLevel::Minimal, 0usize, low_cap, 0usize),
695        (DetailLevel::Minimal, 0usize, low_cap.min(2), 1usize),
696    ] {
697        let shown = result_cap.min(nodes.len());
698        let mut lines = vec![render_result_header(query, shown, total_available)];
699        for node in nodes.iter().take(shown) {
700            lines.extend(render_scored_node_candidate_lines(
701                graph,
702                query,
703                node,
704                0,
705                detail,
706                edge_cap,
707                debug_score,
708            ));
709            if depth > 0 {
710                lines.extend(render_neighbor_layers(graph, node.node, depth, detail));
711            }
712        }
713        if visible_total > shown {
714            lines.push(format!("... +{} more nodes omitted", visible_total - shown));
715        }
716        push_limit_omission_line(&mut lines, visible_total, total_available);
717        candidates.push(Candidate {
718            rendered: format!("{}\n", lines.join("\n")),
719            depth,
720            detail,
721            shown_nodes: shown,
722        });
723    }
724
725    pick_best_candidate(candidates, target)
726        .trim_end()
727        .to_owned()
728}
729
730fn pick_best_candidate(candidates: Vec<Candidate>, target: usize) -> String {
731    let lower = (target as f64 * 0.7) as usize;
732    let mut best: Option<(usize, usize, usize, usize, String)> = None;
733
734    for candidate in candidates {
735        let chars = candidate.rendered.chars().count();
736        let overshoot = chars.saturating_sub(target);
737        let undershoot = lower.saturating_sub(chars);
738        let penalty = overshoot.saturating_mul(10).saturating_add(undershoot);
739        let utility = candidate
740            .depth
741            .saturating_mul(100)
742            .saturating_add(candidate.shown_nodes.saturating_mul(5))
743            .saturating_add(candidate.detail.utility_bonus());
744
745        let entry = (
746            penalty,
747            overshoot,
748            usize::MAX - utility,
749            usize::MAX - chars,
750            candidate.rendered,
751        );
752        if best.as_ref().is_none_or(|current| {
753            entry.0 < current.0
754                || (entry.0 == current.0 && entry.1 < current.1)
755                || (entry.0 == current.0 && entry.1 == current.1 && entry.2 < current.2)
756                || (entry.0 == current.0
757                    && entry.1 == current.1
758                    && entry.2 == current.2
759                    && entry.3 < current.3)
760        }) {
761            best = Some(entry);
762        }
763    }
764
765    best.map(|item| item.4).unwrap_or_else(|| "\n".to_owned())
766}
767
768fn render_full_result_section(
769    graph: &GraphFile,
770    query: &str,
771    nodes: &[ScoredNode<'_>],
772    total_available: usize,
773    debug_score: bool,
774) -> String {
775    let mut lines = vec![render_result_header(query, nodes.len(), total_available)];
776    for node in nodes {
777        lines.push(render_scored_node_block(
778            graph,
779            node,
780            true,
781            debug_score,
782            Some(query),
783        ));
784    }
785    push_limit_omission_line(&mut lines, nodes.len(), total_available);
786    lines.join("\n")
787}
788
789fn render_result_header(query: &str, shown: usize, total: usize) -> String {
790    let query = escape_cli_text(query);
791    if shown < total {
792        format!("? {query} ({shown}/{total})")
793    } else {
794        format!("? {query} ({total})")
795    }
796}
797
798fn push_limit_omission_line(lines: &mut Vec<String>, shown: usize, total: usize) {
799    let omitted = total.saturating_sub(shown);
800    if omitted > 0 {
801        lines.push(format!("... {omitted} more nodes omitted by limit"));
802    }
803}
804
805fn fits_target_chars(rendered: &str, target: usize) -> bool {
806    rendered.chars().count() <= target
807}
808
809fn render_single_node_candidate(
810    graph: &GraphFile,
811    node: &Node,
812    depth: usize,
813    detail: DetailLevel,
814    edge_cap: usize,
815) -> String {
816    let lines = render_single_node_candidate_lines(graph, node, depth, detail, edge_cap, None);
817    format!("{}\n", lines.join("\n"))
818}
819
820fn render_single_result_candidate(
821    graph: &GraphFile,
822    query: &str,
823    header: &str,
824    node: &ScoredNode<'_>,
825    total_available: usize,
826    depth: usize,
827    detail: DetailLevel,
828    edge_cap: usize,
829    full: bool,
830    debug_score: bool,
831) -> String {
832    let mut lines = vec![header.to_owned()];
833    if full {
834        lines.push(render_scored_node_block(
835            graph,
836            node,
837            true,
838            debug_score,
839            Some(query),
840        ));
841    } else {
842        lines.extend(render_scored_node_candidate_lines(
843            graph,
844            query,
845            node,
846            depth,
847            detail,
848            edge_cap,
849            debug_score,
850        ));
851    }
852    push_limit_omission_line(&mut lines, 1, total_available);
853    format!("{}\n", lines.join("\n"))
854}
855
856fn render_single_node_candidate_lines(
857    graph: &GraphFile,
858    node: &Node,
859    depth: usize,
860    detail: DetailLevel,
861    edge_cap: usize,
862    query: Option<&str>,
863) -> Vec<String> {
864    let mut lines = render_node_lines_with_edges(graph, node, detail, edge_cap, query);
865    if depth > 0 {
866        lines.extend(render_neighbor_layers(graph, node, depth, detail));
867    }
868    lines
869}
870
871fn render_scored_node_candidate_lines(
872    graph: &GraphFile,
873    query: &str,
874    node: &ScoredNode<'_>,
875    depth: usize,
876    detail: DetailLevel,
877    edge_cap: usize,
878    debug_score: bool,
879) -> Vec<String> {
880    let mut lines = vec![format!("score: {}", node.score)];
881    if debug_score {
882        lines.push(render_score_debug_line(node));
883    }
884    lines.extend(render_single_node_candidate_lines(
885        graph,
886        node.node,
887        depth,
888        detail,
889        edge_cap,
890        Some(query),
891    ));
892    lines
893}
894
895fn render_scored_node_block(
896    graph: &GraphFile,
897    node: &ScoredNode<'_>,
898    full: bool,
899    debug_score: bool,
900    query: Option<&str>,
901) -> String {
902    if debug_score {
903        format!(
904            "score: {}\n{}\n{}",
905            node.score,
906            render_score_debug_line(node),
907            render_node_block_with_query(graph, node.node, full, query)
908        )
909    } else {
910        format!(
911            "score: {}\n{}",
912            node.score,
913            render_node_block_with_query(graph, node.node, full, query)
914        )
915    }
916}
917
918fn render_score_debug_line(node: &ScoredNode<'_>) -> String {
919    format!(
920        "score_debug: raw_relevance={:.3} normalized_relevance={} lexical_boost={} feedback_boost={} importance_boost={} authority_raw={} authority_applied={} authority_cap={}",
921        node.breakdown.raw_relevance,
922        node.breakdown.normalized_relevance,
923        node.breakdown.lexical_boost,
924        node.breakdown.feedback_boost,
925        node.breakdown.importance_boost,
926        node.breakdown.authority_raw,
927        node.breakdown.authority_applied,
928        node.breakdown.authority_cap,
929    )
930}
931
932fn render_neighbor_layers(
933    graph: &GraphFile,
934    root: &Node,
935    max_depth: usize,
936    detail: DetailLevel,
937) -> Vec<String> {
938    let mut out = Vec::new();
939    let mut seen: HashSet<String> = HashSet::from([root.id.clone()]);
940    let mut queue: VecDeque<(String, usize)> = VecDeque::from([(root.id.clone(), 0usize)]);
941    let mut layers: Vec<Vec<&Node>> = vec![Vec::new(); max_depth + 1];
942
943    while let Some((node_id, depth)) = queue.pop_front() {
944        if depth >= max_depth {
945            continue;
946        }
947        for incident in incident_edges(graph, &node_id) {
948            if seen.insert(incident.related.id.clone()) {
949                let next_depth = depth + 1;
950                if next_depth <= max_depth {
951                    layers[next_depth].push(incident.related);
952                    queue.push_back((incident.related.id.clone(), next_depth));
953                }
954            }
955        }
956    }
957
958    for depth in 1..=max_depth {
959        if layers[depth].is_empty() {
960            continue;
961        }
962        let cap = match detail {
963            DetailLevel::Rich => 6,
964            DetailLevel::Compact => 4,
965            DetailLevel::Minimal => 3,
966        };
967        let shown = layers[depth].len().min(cap);
968        out.push(format!(
969            "depth {depth}: {shown}/{} neighbors",
970            layers[depth].len()
971        ));
972        for node in layers[depth].iter().take(shown) {
973            out.extend(render_node_identity_lines(node, detail));
974        }
975        if layers[depth].len() > shown {
976            out.push(format!(
977                "... +{} more neighbors omitted",
978                layers[depth].len() - shown
979            ));
980        }
981    }
982
983    out
984}
985
986fn render_node_lines_with_edges(
987    graph: &GraphFile,
988    node: &Node,
989    detail: DetailLevel,
990    edge_cap: usize,
991    query: Option<&str>,
992) -> Vec<String> {
993    let mut lines = render_node_identity_lines(node, detail);
994    lines.extend(render_node_link_lines(graph, node, edge_cap, query));
995    lines
996}
997
998fn render_node_identity_lines(node: &Node, detail: DetailLevel) -> Vec<String> {
999    let mut lines = Vec::new();
1000    let display_name = node_display_name(node);
1001    match detail {
1002        DetailLevel::Rich => {
1003            lines.push(format!(
1004                "# {} | {} [{}]",
1005                node.id,
1006                escape_cli_text(&display_name),
1007                node.r#type
1008            ));
1009            if !node.properties.alias.is_empty() {
1010                lines.push(format!(
1011                    "aka: {}",
1012                    node.properties
1013                        .alias
1014                        .iter()
1015                        .map(|alias| escape_cli_text(alias))
1016                        .collect::<Vec<_>>()
1017                        .join(", ")
1018                ));
1019            }
1020            push_description_line(&mut lines, &node.properties.description, None);
1021            let shown_facts = node.properties.key_facts.len().min(3);
1022            for fact in node.properties.key_facts.iter().take(shown_facts) {
1023                lines.push(format!("- {}", escape_cli_text(fact)));
1024            }
1025            let omitted = node.properties.key_facts.len().saturating_sub(shown_facts);
1026            if omitted > 0 {
1027                lines.push(format!("... {omitted} more facts omitted"));
1028            }
1029        }
1030        DetailLevel::Compact => {
1031            lines.push(format!(
1032                "# {} | {} [{}]",
1033                node.id,
1034                escape_cli_text(&display_name),
1035                node.r#type
1036            ));
1037            push_description_line(&mut lines, &node.properties.description, Some(140));
1038            if let Some(fact) = node.properties.key_facts.first() {
1039                lines.push(format!("- {}", escape_cli_text(fact)));
1040            }
1041        }
1042        DetailLevel::Minimal => {
1043            lines.push(format!(
1044                "# {} | {} [{}]",
1045                node.id,
1046                escape_cli_text(&display_name),
1047                node.r#type
1048            ));
1049        }
1050    }
1051    lines
1052}
1053
1054fn node_display_name(node: &Node) -> String {
1055    if !node.name.trim().is_empty() {
1056        return node.name.clone();
1057    }
1058    let raw = node
1059        .id
1060        .split_once(':')
1061        .map(|(_, suffix)| {
1062            suffix
1063                .rsplit_once(':')
1064                .map(|(name, _)| name)
1065                .unwrap_or(suffix)
1066        })
1067        .unwrap_or(node.id.as_str())
1068        .to_owned();
1069    unescape_generated_name(&raw)
1070}
1071
1072fn unescape_generated_name(value: &str) -> String {
1073    let mut out = String::with_capacity(value.len());
1074    let mut chars = value.chars().peekable();
1075    while let Some(ch) = chars.next() {
1076        if ch != '~' {
1077            out.push(ch);
1078            continue;
1079        }
1080        match chars.next() {
1081            Some('~') => out.push('~'),
1082            Some('c') => out.push(':'),
1083            Some(other) => {
1084                out.push('~');
1085                out.push(other);
1086            }
1087            None => out.push('~'),
1088        }
1089    }
1090    out
1091}
1092
1093fn render_node_link_lines(
1094    graph: &GraphFile,
1095    node: &Node,
1096    edge_cap: usize,
1097    query: Option<&str>,
1098) -> Vec<String> {
1099    let mut incident = incident_edges(graph, &node.id);
1100    if let Some(query) = query {
1101        let query_terms = text_norm::expand_query_terms(query);
1102        if !query_terms.is_empty() {
1103            incident.sort_by(|left, right| {
1104                let right_relevance = incident_edge_query_relevance(right, &query_terms);
1105                let left_relevance = incident_edge_query_relevance(left, &query_terms);
1106                right_relevance
1107                    .cmp(&left_relevance)
1108                    .then_with(|| incident_edge_default_cmp(left, right))
1109            });
1110        }
1111    }
1112    if incident.is_empty() {
1113        return Vec::new();
1114    }
1115
1116    let mut lines = Vec::new();
1117    if incident.len() > 12 {
1118        lines.push(format!("links: {} total", incident.len()));
1119        let (out_summary, in_summary) = summarize_relations(&incident);
1120        if !out_summary.is_empty() {
1121            lines.push(format!("out: {out_summary}"));
1122        }
1123        if !in_summary.is_empty() {
1124            lines.push(format!("in: {in_summary}"));
1125        }
1126    }
1127
1128    let shown = incident.len().min(edge_cap);
1129    for edge in incident.into_iter().take(shown) {
1130        let prefix = if edge.incoming { "<-" } else { "->" };
1131        lines.extend(render_edge_lines(prefix, edge.edge, edge.related, false));
1132    }
1133    if edge_cap > 0 && incident_count(graph, &node.id) > shown {
1134        lines.push(format!(
1135            "... {} more links omitted",
1136            incident_count(graph, &node.id) - shown
1137        ));
1138    }
1139    lines
1140}
1141
1142fn incident_count(graph: &GraphFile, node_id: &str) -> usize {
1143    graph
1144        .edges
1145        .iter()
1146        .filter(|edge| edge.source_id == node_id || edge.target_id == node_id)
1147        .count()
1148}
1149
1150struct IncidentEdge<'a> {
1151    edge: &'a Edge,
1152    related: &'a Node,
1153    incoming: bool,
1154}
1155
1156fn incident_edges<'a>(graph: &'a GraphFile, node_id: &str) -> Vec<IncidentEdge<'a>> {
1157    let mut edges = Vec::new();
1158    for edge in &graph.edges {
1159        if edge.source_id == node_id {
1160            if let Some(related) = graph.node_by_id(&edge.target_id) {
1161                edges.push(IncidentEdge {
1162                    edge,
1163                    related,
1164                    incoming: false,
1165                });
1166            }
1167        } else if edge.target_id == node_id {
1168            if let Some(related) = graph.node_by_id(&edge.source_id) {
1169                edges.push(IncidentEdge {
1170                    edge,
1171                    related,
1172                    incoming: true,
1173                });
1174            }
1175        }
1176    }
1177    edges.sort_by(incident_edge_default_cmp);
1178    edges
1179}
1180
1181fn incident_edge_default_cmp(
1182    left: &IncidentEdge<'_>,
1183    right: &IncidentEdge<'_>,
1184) -> std::cmp::Ordering {
1185    right
1186        .related
1187        .properties
1188        .importance
1189        .partial_cmp(&left.related.properties.importance)
1190        .unwrap_or(std::cmp::Ordering::Equal)
1191        .then_with(|| left.edge.relation.cmp(&right.edge.relation))
1192        .then_with(|| left.related.id.cmp(&right.related.id))
1193}
1194
1195fn incident_edge_query_relevance(edge: &IncidentEdge<'_>, query_terms: &[String]) -> i64 {
1196    if query_terms.is_empty() {
1197        return 0;
1198    }
1199    let related = edge.related;
1200    let mut score = 0;
1201    score += query_overlap_score(&related.id, query_terms, 6);
1202    score += query_overlap_score(&related.name, query_terms, 5);
1203    score += query_overlap_score(&related.properties.description, query_terms, 2);
1204    score += query_overlap_score(&edge.edge.relation, query_terms, 2);
1205    score += query_overlap_score(&edge.edge.properties.detail, query_terms, 2);
1206    for alias in &related.properties.alias {
1207        score += query_overlap_score(alias, query_terms, 4);
1208    }
1209    score
1210}
1211
1212fn query_overlap_score(value: &str, query_terms: &[String], weight: i64) -> i64 {
1213    if value.is_empty() || query_terms.is_empty() {
1214        return 0;
1215    }
1216    let value_terms: HashSet<String> = tokenize(value).into_iter().collect();
1217    if value_terms.is_empty() {
1218        return 0;
1219    }
1220    let matches = query_terms
1221        .iter()
1222        .filter(|term| value_terms.contains(term.as_str()))
1223        .count() as i64;
1224    matches * weight
1225}
1226
1227fn summarize_relations(edges: &[IncidentEdge<'_>]) -> (String, String) {
1228    let mut out: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1229    let mut incoming: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1230
1231    for edge in edges {
1232        let bucket = if edge.incoming {
1233            &mut incoming
1234        } else {
1235            &mut out
1236        };
1237        *bucket.entry(edge.edge.relation.clone()).or_insert(0) += 1;
1238    }
1239
1240    (join_relation_counts(&out), join_relation_counts(&incoming))
1241}
1242
1243fn join_relation_counts(counts: &std::collections::BTreeMap<String, usize>) -> String {
1244    counts
1245        .iter()
1246        .take(3)
1247        .map(|(relation, count)| format!("{relation} x{count}"))
1248        .collect::<Vec<_>>()
1249        .join(", ")
1250}
1251
1252fn render_node_block(graph: &GraphFile, node: &Node, full: bool) -> String {
1253    render_node_block_with_query(graph, node, full, None)
1254}
1255
1256fn render_node_block_with_query(
1257    graph: &GraphFile,
1258    node: &Node,
1259    full: bool,
1260    query: Option<&str>,
1261) -> String {
1262    let mut lines = Vec::new();
1263    let display_name = node_display_name(node);
1264    let generated = crate::validate::is_generated_node_type(&node.r#type);
1265    lines.push(format!(
1266        "# {} | {} [{}]",
1267        node.id,
1268        escape_cli_text(&display_name),
1269        node.r#type
1270    ));
1271
1272    if !node.properties.alias.is_empty() {
1273        lines.push(format!(
1274            "aka: {}",
1275            node.properties
1276                .alias
1277                .iter()
1278                .map(|alias| escape_cli_text(alias))
1279                .collect::<Vec<_>>()
1280                .join(", ")
1281        ));
1282    }
1283    push_description_line(
1284        &mut lines,
1285        &node.properties.description,
1286        if full { None } else { Some(200) },
1287    );
1288    if full && !generated {
1289        if !node.properties.domain_area.is_empty() {
1290            lines.push(format!(
1291                "domain_area: {}",
1292                escape_cli_text(&node.properties.domain_area)
1293            ));
1294        }
1295        if let Some(scan) = node.properties.scan {
1296            lines.push(format!("scan: {scan}"));
1297        }
1298        if let Some(scan_ignore_unknown) = node.properties.scan_ignore_unknown {
1299            lines.push(format!("scan_ignore_unknown: {scan_ignore_unknown}"));
1300        }
1301        if !node.properties.provenance.is_empty() {
1302            lines.push(format!(
1303                "provenance: {}",
1304                escape_cli_text(&node.properties.provenance)
1305            ));
1306        }
1307        if let Some(confidence) = node.properties.confidence {
1308            lines.push(format!("confidence: {confidence}"));
1309        }
1310        lines.push(format!("importance: {}", node.properties.importance));
1311        if !node.properties.created_at.is_empty() {
1312            lines.push(format!("created_at: {}", node.properties.created_at));
1313        }
1314    }
1315
1316    let facts_to_show = if full {
1317        node.properties.key_facts.len()
1318    } else {
1319        node.properties.key_facts.len().min(2)
1320    };
1321    for fact in node.properties.key_facts.iter().take(facts_to_show) {
1322        lines.push(format!("- {}", escape_cli_text(fact)));
1323    }
1324    let omitted = node
1325        .properties
1326        .key_facts
1327        .len()
1328        .saturating_sub(facts_to_show);
1329    if omitted > 0 {
1330        lines.push(format!("... {omitted} more facts omitted"));
1331    }
1332
1333    if full && !generated {
1334        if !node.source_files.is_empty() {
1335            lines.push(format!(
1336                "sources: {}",
1337                node.source_files
1338                    .iter()
1339                    .map(|source| escape_cli_text(source))
1340                    .collect::<Vec<_>>()
1341                    .join(", ")
1342            ));
1343        }
1344        push_feedback_lines(
1345            &mut lines,
1346            node.properties.feedback_score,
1347            node.properties.feedback_count,
1348            node.properties.feedback_last_ts_ms,
1349            None,
1350        );
1351    }
1352
1353    let attached_notes: Vec<_> = graph
1354        .notes
1355        .iter()
1356        .filter(|note| note.node_id == node.id)
1357        .collect();
1358    if full && !attached_notes.is_empty() {
1359        lines.push(format!("notes: {}", attached_notes.len()));
1360        for note in attached_notes {
1361            lines.extend(render_attached_note_lines(note));
1362        }
1363    }
1364
1365    for edge in outgoing_edges(graph, &node.id, full, query) {
1366        if let Some(target) = graph.node_by_id(&edge.target_id) {
1367            lines.extend(render_edge_lines("->", edge, target, full));
1368        }
1369    }
1370    for edge in incoming_edges(graph, &node.id, full, query) {
1371        if let Some(source) = graph.node_by_id(&edge.source_id) {
1372            lines.extend(render_edge_lines("<-", edge, source, full));
1373        }
1374    }
1375
1376    lines.join("\n")
1377}
1378
1379fn outgoing_edges<'a>(
1380    graph: &'a GraphFile,
1381    node_id: &str,
1382    full: bool,
1383    query: Option<&str>,
1384) -> Vec<&'a Edge> {
1385    let mut edges: Vec<&Edge> = graph
1386        .edges
1387        .iter()
1388        .filter(|edge| edge.source_id == node_id)
1389        .collect();
1390    if let Some(query) = query {
1391        let query_terms = text_norm::expand_query_terms(query);
1392        if !query_terms.is_empty() {
1393            edges.sort_by(|left, right| {
1394                let right_score = directed_edge_query_relevance(graph, right, false, &query_terms);
1395                let left_score = directed_edge_query_relevance(graph, left, false, &query_terms);
1396                right_score
1397                    .cmp(&left_score)
1398                    .then_with(|| left.relation.cmp(&right.relation))
1399                    .then_with(|| left.target_id.cmp(&right.target_id))
1400            });
1401        } else {
1402            edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1403        }
1404    } else {
1405        edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1406    }
1407    if !full {
1408        edges.truncate(3);
1409    }
1410    edges
1411}
1412
1413fn incoming_edges<'a>(
1414    graph: &'a GraphFile,
1415    node_id: &str,
1416    full: bool,
1417    query: Option<&str>,
1418) -> Vec<&'a Edge> {
1419    let mut edges: Vec<&Edge> = graph
1420        .edges
1421        .iter()
1422        .filter(|edge| edge.target_id == node_id)
1423        .collect();
1424    if let Some(query) = query {
1425        let query_terms = text_norm::expand_query_terms(query);
1426        if !query_terms.is_empty() {
1427            edges.sort_by(|left, right| {
1428                let right_score = directed_edge_query_relevance(graph, right, true, &query_terms);
1429                let left_score = directed_edge_query_relevance(graph, left, true, &query_terms);
1430                right_score
1431                    .cmp(&left_score)
1432                    .then_with(|| left.relation.cmp(&right.relation))
1433                    .then_with(|| left.source_id.cmp(&right.source_id))
1434            });
1435        } else {
1436            edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1437        }
1438    } else {
1439        edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1440    }
1441    if !full {
1442        edges.truncate(3);
1443    }
1444    edges
1445}
1446
1447fn directed_edge_query_relevance(
1448    graph: &GraphFile,
1449    edge: &Edge,
1450    incoming: bool,
1451    query_terms: &[String],
1452) -> i64 {
1453    let related = if incoming {
1454        graph.node_by_id(&edge.source_id)
1455    } else {
1456        graph.node_by_id(&edge.target_id)
1457    };
1458    let mut score = query_overlap_score(&edge.relation, query_terms, 2)
1459        + query_overlap_score(&edge.properties.detail, query_terms, 2);
1460    if let Some(node) = related {
1461        score += query_overlap_score(&node.id, query_terms, 6);
1462        score += query_overlap_score(&node.name, query_terms, 5);
1463        score += query_overlap_score(&node.properties.description, query_terms, 2);
1464        for alias in &node.properties.alias {
1465            score += query_overlap_score(alias, query_terms, 4);
1466        }
1467    }
1468    score
1469}
1470
1471fn render_edge_lines(prefix: &str, edge: &Edge, related: &Node, full: bool) -> Vec<String> {
1472    let (arrow, relation) = if edge.relation.starts_with("NOT_") {
1473        (
1474            format!("{prefix}!"),
1475            edge.relation.trim_start_matches("NOT_"),
1476        )
1477    } else {
1478        (prefix.to_owned(), edge.relation.as_str())
1479    };
1480
1481    let mut line = format!(
1482        "{arrow} {relation} | {} | {}",
1483        related.id,
1484        escape_cli_text(&related.name)
1485    );
1486    if !edge.properties.detail.is_empty() {
1487        line.push_str(" | ");
1488        let detail = escape_cli_text(&edge.properties.detail);
1489        if full {
1490            line.push_str(&detail);
1491        } else {
1492            line.push_str(&truncate(&detail, 80));
1493        }
1494    }
1495    let mut lines = vec![line];
1496    if full {
1497        push_feedback_lines(
1498            &mut lines,
1499            edge.properties.feedback_score,
1500            edge.properties.feedback_count,
1501            edge.properties.feedback_last_ts_ms,
1502            Some("edge_"),
1503        );
1504        if !edge.properties.valid_from.is_empty() {
1505            lines.push(format!("edge_valid_from: {}", edge.properties.valid_from));
1506        }
1507        if !edge.properties.valid_to.is_empty() {
1508            lines.push(format!("edge_valid_to: {}", edge.properties.valid_to));
1509        }
1510    }
1511    lines
1512}
1513
1514fn truncate(value: &str, max_len: usize) -> String {
1515    let char_count = value.chars().count();
1516    if char_count <= max_len {
1517        return value.to_owned();
1518    }
1519    let truncated: String = value.chars().take(max_len.saturating_sub(3)).collect();
1520    format!("{truncated}...")
1521}
1522
1523fn escape_cli_text(value: &str) -> String {
1524    let mut out = String::new();
1525    for ch in value.chars() {
1526        match ch {
1527            '\\' => out.push_str("\\\\"),
1528            '\n' => out.push_str("\\n"),
1529            '\r' => out.push_str("\\r"),
1530            '\t' => out.push_str("\\t"),
1531            _ => out.push(ch),
1532        }
1533    }
1534    out
1535}
1536
1537fn push_description_line(lines: &mut Vec<String>, description: &str, max_len: Option<usize>) {
1538    if description.is_empty() {
1539        return;
1540    }
1541    let escaped = escape_cli_text(description);
1542    let rendered = match max_len {
1543        Some(limit) => truncate(&escaped, limit),
1544        None => escaped,
1545    };
1546    lines.push(format!("desc: {rendered}"));
1547}
1548
1549fn push_feedback_lines(
1550    lines: &mut Vec<String>,
1551    score: f64,
1552    count: u64,
1553    last_ts_ms: Option<u64>,
1554    prefix: Option<&str>,
1555) {
1556    let prefix = prefix.unwrap_or("");
1557    if score != 0.0 {
1558        lines.push(format!("{prefix}feedback_score: {score}"));
1559    }
1560    if count != 0 {
1561        lines.push(format!("{prefix}feedback_count: {count}"));
1562    }
1563    if let Some(ts) = last_ts_ms {
1564        lines.push(format!("{prefix}feedback_last_ts_ms: {ts}"));
1565    }
1566}
1567
1568fn render_attached_note_lines(note: &crate::graph::Note) -> Vec<String> {
1569    let mut lines = vec![format!("! {}", note.id)];
1570    if !note.body.is_empty() {
1571        lines.push(format!("note_body: {}", escape_cli_text(&note.body)));
1572    }
1573    if !note.tags.is_empty() {
1574        lines.push(format!(
1575            "note_tags: {}",
1576            note.tags
1577                .iter()
1578                .map(|tag| escape_cli_text(tag))
1579                .collect::<Vec<_>>()
1580                .join(", ")
1581        ));
1582    }
1583    if !note.author.is_empty() {
1584        lines.push(format!("note_author: {}", escape_cli_text(&note.author)));
1585    }
1586    if !note.created_at.is_empty() {
1587        lines.push(format!("note_created_at: {}", note.created_at));
1588    }
1589    if !note.provenance.is_empty() {
1590        lines.push(format!(
1591            "note_provenance: {}",
1592            escape_cli_text(&note.provenance)
1593        ));
1594    }
1595    if !note.source_files.is_empty() {
1596        lines.push(format!(
1597            "note_sources: {}",
1598            note.source_files
1599                .iter()
1600                .map(|source| escape_cli_text(source))
1601                .collect::<Vec<_>>()
1602                .join(", ")
1603        ));
1604    }
1605    lines
1606}
1607
1608fn find_matches_with_index<'a>(
1609    graph: &'a GraphFile,
1610    query: &str,
1611    limit: usize,
1612    include_features: bool,
1613    include_metadata: bool,
1614    mode: FindMode,
1615    index: Option<&Bm25Index>,
1616    tune: Option<&FindTune>,
1617) -> Vec<ScoredNode<'a>> {
1618    let mut matches = find_all_matches_with_index(
1619        graph,
1620        query,
1621        include_features,
1622        include_metadata,
1623        mode,
1624        index,
1625        tune,
1626    );
1627    matches.truncate(limit);
1628    matches
1629}
1630
1631fn find_all_matches_with_index<'a>(
1632    graph: &'a GraphFile,
1633    query: &str,
1634    include_features: bool,
1635    include_metadata: bool,
1636    mode: FindMode,
1637    index: Option<&Bm25Index>,
1638    tune: Option<&FindTune>,
1639) -> Vec<ScoredNode<'a>> {
1640    let context = FindQueryContext::build(graph);
1641    let rewritten_query = rewrite_query(query);
1642    let fuzzy_query = if rewritten_query.is_empty() {
1643        query.to_owned()
1644    } else {
1645        rewritten_query
1646    };
1647    let mut scored: Vec<ScoredNode<'a>> = match mode {
1648        FindMode::Fuzzy => {
1649            let pattern = Pattern::parse(&fuzzy_query, CaseMatching::Ignore, Normalization::Smart);
1650            let mut matcher = Matcher::new(Config::DEFAULT);
1651            let candidates = graph
1652                .nodes
1653                .iter()
1654                .filter(|node| node_is_searchable(node, include_features, include_metadata))
1655                .filter_map(|node| {
1656                    score_node(&context, node, &fuzzy_query, &pattern, &mut matcher).map(|score| {
1657                        RawCandidate {
1658                            node,
1659                            raw_relevance: score as f64,
1660                            lexical_boost: 0,
1661                        }
1662                    })
1663                })
1664                .collect();
1665            compose_scores(candidates)
1666        }
1667        FindMode::Bm25 => compose_scores(score_bm25_raw(
1668            graph,
1669            &context,
1670            &fuzzy_query,
1671            include_features,
1672            include_metadata,
1673            index,
1674        )),
1675        FindMode::Hybrid => compose_scores(score_hybrid_raw(
1676            graph,
1677            &context,
1678            &fuzzy_query,
1679            include_features,
1680            include_metadata,
1681            index,
1682            tune.copied().unwrap_or_default(),
1683        )),
1684    };
1685
1686    scored.sort_by(|left, right| {
1687        right
1688            .score
1689            .cmp(&left.score)
1690            .then_with(|| left.node.id.cmp(&right.node.id))
1691    });
1692    let mut seen_ids = HashSet::new();
1693    scored.retain(|item| {
1694        let key = crate::validate::normalize_node_id(&item.node.id).to_ascii_lowercase();
1695        seen_ids.insert(key)
1696    });
1697    scored
1698}
1699
1700fn compose_scores<'a>(candidates: Vec<RawCandidate<'a>>) -> Vec<ScoredNode<'a>> {
1701    let max_raw = candidates
1702        .iter()
1703        .map(|candidate| candidate.raw_relevance)
1704        .fold(0.0f64, f64::max);
1705    let max_raw_log = max_raw.ln_1p();
1706
1707    candidates
1708        .into_iter()
1709        .filter_map(|candidate| {
1710            if candidate.raw_relevance <= 0.0 {
1711                return None;
1712            }
1713            let normalized_relevance = if max_raw_log > 0.0 {
1714                ((candidate.raw_relevance.ln_1p() / max_raw_log) * 1000.0).round() as i64
1715            } else {
1716                0
1717            };
1718            let feedback = feedback_boost(candidate.node);
1719            let importance = importance_boost(candidate.node);
1720            let authority_raw = feedback + importance;
1721            let relative_cap =
1722                ((normalized_relevance as f64) * SCORE_META_MAX_RATIO).round() as i64;
1723            let authority_cap = relative_cap.max(SCORE_META_MIN_CAP).min(SCORE_META_MAX_CAP);
1724            let authority_applied = authority_raw.clamp(-authority_cap, authority_cap);
1725            let final_score = normalized_relevance + authority_applied;
1726
1727            Some(ScoredNode {
1728                score: final_score,
1729                node: candidate.node,
1730                breakdown: ScoreBreakdown {
1731                    raw_relevance: candidate.raw_relevance,
1732                    normalized_relevance,
1733                    lexical_boost: candidate.lexical_boost,
1734                    feedback_boost: feedback,
1735                    importance_boost: importance,
1736                    authority_raw,
1737                    authority_applied,
1738                    authority_cap,
1739                },
1740            })
1741        })
1742        .collect()
1743}
1744
1745fn feedback_boost(node: &Node) -> i64 {
1746    let count = node.properties.feedback_count as f64;
1747    if count <= 0.0 {
1748        return 0;
1749    }
1750    let avg = node.properties.feedback_score / count;
1751    let confidence = (count.ln_1p() / 3.0).min(1.0);
1752    let scaled = avg * 200.0 * confidence;
1753    scaled.clamp(-300.0, 300.0).round() as i64
1754}
1755
1756fn importance_boost(node: &Node) -> i64 {
1757    let normalized_importance = if (0.0..=1.0).contains(&node.properties.importance) {
1758        node.properties.importance
1759    } else if (1.0..=6.0).contains(&node.properties.importance) {
1760        (node.properties.importance - 1.0) / 5.0
1761    } else {
1762        node.properties.importance.clamp(0.0, 1.0)
1763    };
1764    let normalized = (normalized_importance - IMPORTANCE_NEUTRAL) * 2.0;
1765    (normalized * IMPORTANCE_MAX_ABS_BOOST).round() as i64
1766}
1767
1768fn score_bm25_raw<'a>(
1769    graph: &'a GraphFile,
1770    context: &FindQueryContext<'a>,
1771    query: &str,
1772    include_features: bool,
1773    include_metadata: bool,
1774    index: Option<&Bm25Index>,
1775) -> Vec<RawCandidate<'a>> {
1776    let terms = text_norm::expand_query_terms(query);
1777    if terms.is_empty() {
1778        return Vec::new();
1779    }
1780
1781    if let Some(idx) = index {
1782        let results = idx.search(&terms, graph);
1783        return results
1784            .into_iter()
1785            .filter_map(|(node_id, score)| {
1786                let node = graph.node_by_id(&node_id)?;
1787                if !node_is_searchable(node, include_features, include_metadata) {
1788                    return None;
1789                }
1790                let self_terms = node_self_document_terms(context, node);
1791                let neighbor_score =
1792                    best_neighbor_bm25_score_with_index(context, node, &terms, idx);
1793                let base_score = combine_bm25_components(node, score as f64, neighbor_score);
1794                if base_score <= 0.0 {
1795                    return None;
1796                }
1797                let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1798                    idx.idf.get(term).copied().unwrap_or(0.0) as f64
1799                });
1800                let proximity_boost = bm25_proximity_boost(context, node, &terms);
1801                Some(RawCandidate {
1802                    node,
1803                    raw_relevance: base_score * 100.0
1804                        + lexical_boost as f64
1805                        + proximity_boost as f64,
1806                    lexical_boost: lexical_boost + proximity_boost,
1807                })
1808            })
1809            .collect();
1810    }
1811
1812    let docs: Vec<(&'a Node, Vec<String>)> = graph
1813        .nodes
1814        .iter()
1815        .filter(|node| node_is_searchable(node, include_features, include_metadata))
1816        .map(|node| (node, node_self_document_terms(context, node)))
1817        .collect();
1818
1819    if docs.is_empty() {
1820        return Vec::new();
1821    }
1822
1823    let mut df: HashMap<String, usize> = HashMap::new();
1824    for term in &terms {
1825        let mut count = 0usize;
1826        for (_, tokens) in &docs {
1827            if tokens.iter().any(|t| t == term) {
1828                count += 1;
1829            }
1830        }
1831        df.insert(term.clone(), count);
1832    }
1833
1834    let total_docs = docs.len() as f64;
1835    let avgdl = docs
1836        .iter()
1837        .map(|(_, tokens)| tokens.len() as f64)
1838        .sum::<f64>()
1839        / total_docs.max(1.0);
1840
1841    let mut idf_by_term: HashMap<String, f64> = HashMap::new();
1842    for term in &terms {
1843        let df_t = *df.get(term).unwrap_or(&0) as f64;
1844        let idf = (1.0 + (total_docs - df_t + 0.5) / (df_t + 0.5)).ln();
1845        idf_by_term.insert(term.clone(), idf);
1846    }
1847
1848    let mut scored = Vec::new();
1849
1850    for (node, self_terms) in docs {
1851        let self_score = bm25_document_score(&terms, &self_terms, &idf_by_term, avgdl);
1852        let neighbor_score = best_neighbor_bm25_score(context, node, &terms, &idf_by_term, avgdl);
1853        let base_score = combine_bm25_components(node, self_score, neighbor_score);
1854        if base_score <= 0.0 {
1855            continue;
1856        }
1857        let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1858            idf_by_term.get(term).copied().unwrap_or(0.0)
1859        });
1860        let proximity_boost = bm25_proximity_boost(context, node, &terms);
1861        scored.push(RawCandidate {
1862            node,
1863            raw_relevance: base_score * 100.0 + lexical_boost as f64 + proximity_boost as f64,
1864            lexical_boost: lexical_boost + proximity_boost,
1865        });
1866    }
1867
1868    scored
1869}
1870
1871fn score_hybrid_raw<'a>(
1872    graph: &'a GraphFile,
1873    context: &FindQueryContext<'a>,
1874    query: &str,
1875    include_features: bool,
1876    include_metadata: bool,
1877    index: Option<&Bm25Index>,
1878    tune: FindTune,
1879) -> Vec<RawCandidate<'a>> {
1880    let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
1881    let mut matcher = Matcher::new(Config::DEFAULT);
1882
1883    let mut fuzzy_raw = HashMap::new();
1884    for node in graph
1885        .nodes
1886        .iter()
1887        .filter(|node| node_is_searchable(node, include_features, include_metadata))
1888    {
1889        if let Some(score) = score_node(context, node, query, &pattern, &mut matcher) {
1890            fuzzy_raw.insert(node.id.as_str(), score as f64);
1891        }
1892    }
1893
1894    let bm25_candidates = score_bm25_raw(
1895        graph,
1896        context,
1897        query,
1898        include_features,
1899        include_metadata,
1900        index,
1901    );
1902    let mut bm25_raw = HashMap::new();
1903    let mut lexical_boost = HashMap::new();
1904    for candidate in bm25_candidates {
1905        bm25_raw.insert(candidate.node.id.as_str(), candidate.raw_relevance);
1906        lexical_boost.insert(candidate.node.id.as_str(), candidate.lexical_boost);
1907    }
1908
1909    let fuzzy_norm = normalize_raw_scores(&fuzzy_raw);
1910    let bm25_norm = normalize_raw_scores(&bm25_raw);
1911    let total_weight = (tune.bm25 + tune.fuzzy).max(0.0001);
1912
1913    graph
1914        .nodes
1915        .iter()
1916        .filter(|node| node_is_searchable(node, include_features, include_metadata))
1917        .filter_map(|node| {
1918            let f = fuzzy_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1919            let b = bm25_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1920            let combined = ((tune.fuzzy * f) + (tune.bm25 * b)) / total_weight;
1921            if combined <= 0.0 {
1922                return None;
1923            }
1924            Some(RawCandidate {
1925                node,
1926                raw_relevance: combined * 1000.0,
1927                lexical_boost: lexical_boost.get(node.id.as_str()).copied().unwrap_or(0),
1928            })
1929        })
1930        .collect()
1931}
1932
1933fn normalize_raw_scores<'a>(raw: &'a HashMap<&'a str, f64>) -> HashMap<&'a str, f64> {
1934    let max_raw = raw.values().copied().fold(0.0f64, f64::max);
1935    let max_log = max_raw.ln_1p();
1936    raw.iter()
1937        .map(|(id, value)| {
1938            let normalized = if max_log > 0.0 {
1939                value.ln_1p() / max_log
1940            } else {
1941                0.0
1942            };
1943            (*id, normalized.clamp(0.0, 1.0))
1944        })
1945        .collect()
1946}
1947
1948fn node_is_searchable(node: &Node, include_features: bool, include_metadata: bool) -> bool {
1949    (include_features || node.r#type != "Feature") && (include_metadata || node.r#type != "^")
1950}
1951
1952fn node_self_document_terms(context: &FindQueryContext<'_>, node: &Node) -> Vec<String> {
1953    let mut tokens = Vec::new();
1954    push_terms(&mut tokens, &node.id, BM25_ID_WEIGHT);
1955    push_terms(&mut tokens, &node.name, BM25_NAME_WEIGHT);
1956    push_terms(
1957        &mut tokens,
1958        &node.properties.description,
1959        BM25_DESCRIPTION_WEIGHT,
1960    );
1961    for alias in &node.properties.alias {
1962        push_terms(&mut tokens, alias, BM25_ALIAS_WEIGHT);
1963    }
1964    for fact in &node.properties.key_facts {
1965        push_terms(&mut tokens, fact, BM25_FACT_WEIGHT);
1966    }
1967    for note in context.notes_for(&node.id) {
1968        push_terms(&mut tokens, &note.body, BM25_NOTE_BODY_WEIGHT);
1969        for tag in &note.tags {
1970            push_terms(&mut tokens, tag, BM25_NOTE_TAG_WEIGHT);
1971        }
1972    }
1973    tokens
1974}
1975
1976fn neighbor_document_terms(neighbor: &Node) -> Vec<String> {
1977    let mut tokens = Vec::new();
1978    push_terms(&mut tokens, &neighbor.id, BM25_NEIGHBOR_WEIGHT);
1979    push_terms(&mut tokens, &neighbor.name, BM25_NEIGHBOR_WEIGHT);
1980    push_terms(
1981        &mut tokens,
1982        &neighbor.properties.description,
1983        BM25_NEIGHBOR_WEIGHT,
1984    );
1985    for alias in &neighbor.properties.alias {
1986        push_terms(&mut tokens, alias, BM25_NEIGHBOR_WEIGHT);
1987    }
1988    tokens
1989}
1990
1991fn fact_volume_normalizer(node: &Node) -> f64 {
1992    let fact_chars = node
1993        .properties
1994        .key_facts
1995        .iter()
1996        .map(|fact| fact.chars().count())
1997        .sum::<usize>() as f64;
1998    if fact_chars <= 0.0 {
1999        return 1.0;
2000    }
2001    let scaled = FACT_VOLUME_BASE_CHARS.sqrt() / fact_chars.sqrt();
2002    scaled.clamp(FACT_VOLUME_MIN_FACTOR, 1.0)
2003}
2004
2005fn bm25_document_score(
2006    query_terms: &[String],
2007    document_terms: &[String],
2008    idf_by_term: &HashMap<String, f64>,
2009    avgdl: f64,
2010) -> f64 {
2011    if query_terms.is_empty() || document_terms.is_empty() {
2012        return 0.0;
2013    }
2014    let dl = document_terms.len() as f64;
2015    if dl <= 0.0 {
2016        return 0.0;
2017    }
2018    let mut score = 0.0;
2019    for term in query_terms {
2020        let tf = document_terms.iter().filter(|token| *token == term).count() as f64;
2021        if tf <= 0.0 {
2022            continue;
2023        }
2024        let idf = idf_by_term.get(term).copied().unwrap_or(0.0);
2025        if idf <= 0.0 {
2026            continue;
2027        }
2028        let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
2029        score += idf * (tf * (BM25_K1 + 1.0) / denom);
2030    }
2031    score
2032}
2033
2034fn best_neighbor_bm25_score(
2035    context: &FindQueryContext<'_>,
2036    node: &Node,
2037    query_terms: &[String],
2038    idf_by_term: &HashMap<String, f64>,
2039    avgdl: f64,
2040) -> f64 {
2041    context
2042        .neighbors_for(&node.id)
2043        .iter()
2044        .map(|neighbor| {
2045            let neighbor_terms = neighbor_document_terms(neighbor);
2046            bm25_document_score(query_terms, &neighbor_terms, idf_by_term, avgdl)
2047        })
2048        .fold(0.0f64, f64::max)
2049}
2050
2051fn best_neighbor_bm25_score_with_index(
2052    context: &FindQueryContext<'_>,
2053    node: &Node,
2054    query_terms: &[String],
2055    index: &Bm25Index,
2056) -> f64 {
2057    let avgdl = index.avg_doc_len as f64;
2058    context
2059        .neighbors_for(&node.id)
2060        .iter()
2061        .map(|neighbor| {
2062            let neighbor_terms = neighbor_document_terms(neighbor);
2063            let dl = neighbor_terms.len() as f64;
2064            if dl <= 0.0 {
2065                return 0.0;
2066            }
2067            let mut score = 0.0;
2068            for term in query_terms {
2069                let idf = index.idf.get(term).copied().unwrap_or(0.0) as f64;
2070                if idf <= 0.0 {
2071                    continue;
2072                }
2073                let tf = neighbor_terms.iter().filter(|token| *token == term).count() as f64;
2074                if tf <= 0.0 {
2075                    continue;
2076                }
2077                let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
2078                score += idf * (tf * (BM25_K1 + 1.0) / denom);
2079            }
2080            score
2081        })
2082        .fold(0.0f64, f64::max)
2083}
2084
2085fn combine_bm25_components(node: &Node, self_score: f64, neighbor_score: f64) -> f64 {
2086    let combined =
2087        BM25_SELF_CONTEXT_WEIGHT * self_score + BM25_NEIGHBOR_CONTEXT_WEIGHT * neighbor_score;
2088    combined * fact_volume_normalizer(node)
2089}
2090
2091fn push_terms(target: &mut Vec<String>, value: &str, weight: usize) {
2092    if value.is_empty() {
2093        return;
2094    }
2095    let terms = tokenize(value);
2096    for _ in 0..weight {
2097        target.extend(terms.iter().cloned());
2098    }
2099}
2100
2101fn tokenize(text: &str) -> Vec<String> {
2102    text_norm::tokenize(text)
2103}
2104
2105fn rewrite_query(query: &str) -> String {
2106    text_norm::expand_query_terms(query).join(" ")
2107}
2108
2109fn bm25_lexical_boost_with_idf<F>(
2110    query_terms: &[String],
2111    document_terms: &[String],
2112    idf_for: F,
2113) -> i64
2114where
2115    F: Fn(&str) -> f64,
2116{
2117    if query_terms.is_empty() || document_terms.is_empty() {
2118        return 0;
2119    }
2120    if query_terms.len() > 1 && contains_token_phrase(document_terms, query_terms) {
2121        return BM25_PHRASE_MATCH_BOOST;
2122    }
2123    let document_vocab: HashSet<&str> = document_terms.iter().map(String::as_str).collect();
2124    let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2125    let mut total_idf = 0.0;
2126    let mut matched_idf = 0.0;
2127    let mut matched_terms = 0i64;
2128    for term in query_vocab {
2129        let idf = idf_for(term).max(0.0);
2130        total_idf += if idf > 0.0 { idf } else { 1.0 };
2131        if document_vocab.contains(term) {
2132            matched_terms += 1;
2133            matched_idf += if idf > 0.0 { idf } else { 1.0 };
2134        }
2135    }
2136    if matched_terms == 0 {
2137        return 0;
2138    }
2139    ((matched_idf / total_idf.max(1.0)) * BM25_TOKEN_MATCH_BOOST as f64).round() as i64
2140}
2141
2142fn bm25_proximity_boost(
2143    context: &FindQueryContext<'_>,
2144    node: &Node,
2145    query_terms: &[String],
2146) -> i64 {
2147    if query_terms.len() < 2 {
2148        return 0;
2149    }
2150    let mut best_span_hits = proximity_hits_in_text(&node.id, query_terms)
2151        .max(proximity_hits_in_text(&node.name, query_terms))
2152        .max(proximity_hits_in_text(
2153            &node.properties.description,
2154            query_terms,
2155        ));
2156    for alias in &node.properties.alias {
2157        best_span_hits = best_span_hits.max(proximity_hits_in_text(alias, query_terms));
2158    }
2159    for fact in &node.properties.key_facts {
2160        best_span_hits = best_span_hits.max(proximity_hits_in_text(fact, query_terms));
2161    }
2162    for note in context.notes_for(&node.id) {
2163        best_span_hits = best_span_hits.max(proximity_hits_in_text(&note.body, query_terms));
2164        for tag in &note.tags {
2165            best_span_hits = best_span_hits.max(proximity_hits_in_text(tag, query_terms));
2166        }
2167    }
2168    if best_span_hits < 2 {
2169        0
2170    } else {
2171        BM25_PROXIMITY_MATCH_BOOST + (best_span_hits as i64 - 2) * 20
2172    }
2173}
2174
2175fn proximity_hits_in_text(value: &str, query_terms: &[String]) -> usize {
2176    if value.is_empty() || query_terms.len() < 2 {
2177        return 0;
2178    }
2179    let tokens = tokenize(value);
2180    if tokens.len() < 2 {
2181        return 0;
2182    }
2183    let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2184    let mut best = 0usize;
2185    for start in 0..tokens.len() {
2186        let end = (start + BM25_PROXIMITY_WINDOW_TOKENS).min(tokens.len());
2187        let mut seen: HashSet<&str> = HashSet::new();
2188        for token in &tokens[start..end] {
2189            if query_vocab.contains(token.as_str()) {
2190                seen.insert(token.as_str());
2191            }
2192        }
2193        best = best.max(seen.len());
2194    }
2195    best
2196}
2197
2198fn contains_token_phrase(document_terms: &[String], query_terms: &[String]) -> bool {
2199    if query_terms.is_empty() || query_terms.len() > document_terms.len() {
2200        return false;
2201    }
2202    document_terms
2203        .windows(query_terms.len())
2204        .any(|window| window == query_terms)
2205}
2206
2207fn score_node(
2208    context: &FindQueryContext<'_>,
2209    node: &Node,
2210    query: &str,
2211    pattern: &Pattern,
2212    matcher: &mut Matcher,
2213) -> Option<u32> {
2214    let mut primary_score = 0;
2215    let mut primary_hits = 0;
2216
2217    let id_score = score_primary_field(query, pattern, matcher, &node.id, 5);
2218    if id_score > 0 {
2219        primary_hits += 1;
2220    }
2221    primary_score += id_score;
2222
2223    let name_score = score_primary_field(query, pattern, matcher, &node.name, 4);
2224    if name_score > 0 {
2225        primary_hits += 1;
2226    }
2227    primary_score += name_score;
2228
2229    for alias in &node.properties.alias {
2230        let alias_score = score_primary_field(query, pattern, matcher, alias, 4);
2231        if alias_score > 0 {
2232            primary_hits += 1;
2233        }
2234        primary_score += alias_score;
2235    }
2236
2237    let mut contextual_score = score_secondary_field(
2238        query,
2239        pattern,
2240        matcher,
2241        &node.properties.description,
2242        FUZZY_DESCRIPTION_WEIGHT,
2243    );
2244    let mut facts_score = 0;
2245    for fact in &node.properties.key_facts {
2246        facts_score += score_secondary_field(query, pattern, matcher, fact, FUZZY_FACT_WEIGHT);
2247    }
2248    let facts_factor = fact_volume_normalizer(node);
2249    contextual_score += ((facts_score as f64) * facts_factor).round() as u32;
2250    contextual_score += score_notes_context(context, node, query, pattern, matcher);
2251
2252    let neighbor_context = score_neighbor_context(context, node, query, pattern, matcher)
2253        .min(FUZZY_NEIGHBOR_CONTEXT_CAP);
2254    contextual_score += neighbor_context / FUZZY_NEIGHBOR_CONTEXT_DIVISOR;
2255
2256    if primary_hits == 0 {
2257        contextual_score /= FUZZY_NO_PRIMARY_CONTEXT_DIVISOR;
2258    }
2259
2260    let total = primary_score + contextual_score;
2261    (total > 0).then_some(total)
2262}
2263
2264fn score_notes_context(
2265    context: &FindQueryContext<'_>,
2266    node: &Node,
2267    query: &str,
2268    pattern: &Pattern,
2269    matcher: &mut Matcher,
2270) -> u32 {
2271    let mut total = 0;
2272    for note in context.notes_for(&node.id) {
2273        total += score_secondary_field(query, pattern, matcher, &note.body, FUZZY_NOTE_BODY_WEIGHT);
2274        for tag in &note.tags {
2275            total += score_secondary_field(query, pattern, matcher, tag, FUZZY_NOTE_TAG_WEIGHT);
2276        }
2277    }
2278    total
2279}
2280
2281fn score_neighbor_context(
2282    context: &FindQueryContext<'_>,
2283    node: &Node,
2284    query: &str,
2285    pattern: &Pattern,
2286    matcher: &mut Matcher,
2287) -> u32 {
2288    let mut best = 0;
2289
2290    for neighbor in context.neighbors_for(&node.id) {
2291        let mut score = score_secondary_field(query, pattern, matcher, &neighbor.id, 1)
2292            + score_secondary_field(query, pattern, matcher, &neighbor.name, 1)
2293            + score_secondary_field(query, pattern, matcher, &neighbor.properties.description, 1);
2294
2295        for alias in &neighbor.properties.alias {
2296            score += score_secondary_field(query, pattern, matcher, alias, 1);
2297        }
2298
2299        best = best.max(score);
2300    }
2301
2302    best
2303}
2304
2305fn score_field(pattern: &Pattern, matcher: &mut Matcher, value: &str) -> Option<u32> {
2306    if value.is_empty() {
2307        return None;
2308    }
2309    let mut buf = Vec::new();
2310    let haystack = Utf32Str::new(value, &mut buf);
2311    pattern.score(haystack, matcher)
2312}
2313
2314fn score_primary_field(
2315    query: &str,
2316    pattern: &Pattern,
2317    matcher: &mut Matcher,
2318    value: &str,
2319    weight: u32,
2320) -> u32 {
2321    let bonus = textual_bonus(query, value);
2322    let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2323    if bonus == 0 && fuzzy == 0 {
2324        return 0;
2325    }
2326    (fuzzy + bonus) * weight
2327}
2328
2329fn score_secondary_field(
2330    query: &str,
2331    pattern: &Pattern,
2332    matcher: &mut Matcher,
2333    value: &str,
2334    weight: u32,
2335) -> u32 {
2336    let bonus = textual_bonus(query, value);
2337    let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2338    if bonus == 0 && fuzzy == 0 {
2339        return 0;
2340    }
2341    (fuzzy + bonus / 2) * weight
2342}
2343
2344fn textual_bonus(query: &str, value: &str) -> u32 {
2345    let query = query.trim().to_lowercase();
2346    let value = value.to_lowercase();
2347
2348    if value == query {
2349        return 400;
2350    }
2351    if value.contains(&query) {
2352        return 200;
2353    }
2354
2355    query
2356        .split_whitespace()
2357        .map(|token| {
2358            if value.contains(token) {
2359                80
2360            } else if is_subsequence(token, &value) {
2361                40
2362            } else {
2363                0
2364            }
2365        })
2366        .sum()
2367}
2368
2369fn is_subsequence(needle: &str, haystack: &str) -> bool {
2370    if needle.is_empty() {
2371        return false;
2372    }
2373
2374    let mut chars = needle.chars();
2375    let mut current = match chars.next() {
2376        Some(ch) => ch,
2377        None => return false,
2378    };
2379
2380    for ch in haystack.chars() {
2381        if ch == current {
2382            match chars.next() {
2383                Some(next) => current = next,
2384                None => return true,
2385            }
2386        }
2387    }
2388
2389    false
2390}
2391
2392#[cfg(test)]
2393mod tests {
2394    use super::*;
2395
2396    fn make_node(
2397        id: &str,
2398        name: &str,
2399        description: &str,
2400        key_facts: &[&str],
2401        alias: &[&str],
2402        importance: f64,
2403        feedback_score: f64,
2404        feedback_count: u64,
2405    ) -> Node {
2406        let mut properties = crate::graph::NodeProperties::default();
2407        properties.description = description.to_owned();
2408        properties.key_facts = key_facts.iter().map(|v| (*v).to_owned()).collect();
2409        properties.alias = alias.iter().map(|v| (*v).to_owned()).collect();
2410        properties.importance = importance;
2411        properties.feedback_score = feedback_score;
2412        properties.feedback_count = feedback_count;
2413        Node {
2414            id: id.to_owned(),
2415            r#type: "Concept".to_owned(),
2416            name: name.to_owned(),
2417            properties,
2418            source_files: Vec::new(),
2419        }
2420    }
2421
2422    fn make_edge(source_id: &str, relation: &str, target_id: &str) -> Edge {
2423        Edge {
2424            source_id: source_id.to_owned(),
2425            relation: relation.to_owned(),
2426            target_id: target_id.to_owned(),
2427            properties: crate::graph::EdgeProperties::default(),
2428        }
2429    }
2430
2431    fn score_for(results: &[ScoredNode<'_>], id: &str) -> i64 {
2432        results
2433            .iter()
2434            .find(|item| item.node.id == id)
2435            .map(|item| item.score)
2436            .expect("score for node")
2437    }
2438
2439    #[test]
2440    fn textual_bonus_tiers_are_stable() {
2441        assert_eq!(textual_bonus("abc", "abc"), 400);
2442        assert_eq!(textual_bonus("abc", "xxabcxx"), 200);
2443        assert_eq!(textual_bonus("abc def", "aa abc and def zz"), 160);
2444        assert_eq!(textual_bonus("abc", "aXbYc"), 40);
2445        assert_eq!(textual_bonus("abc", "zzz"), 0);
2446    }
2447
2448    #[test]
2449    fn tokenize_handles_unicode_casefolding() {
2450        let tokens = tokenize("ŁÓDŹ smart-home");
2451        assert_eq!(tokens, vec!["łódź", "smart", "home"]);
2452    }
2453
2454    #[test]
2455    fn bm25_lexical_boost_prefers_phrase_then_tokens() {
2456        let query_terms = tokenize("smart home api");
2457        assert_eq!(
2458            bm25_lexical_boost_with_idf(&query_terms, &tokenize("x smart home api y"), |_| 1.0),
2459            120
2460        );
2461        assert_eq!(
2462            bm25_lexical_boost_with_idf(&query_terms, &tokenize("smart x api y home"), |_| 1.0),
2463            45
2464        );
2465        assert_eq!(
2466            bm25_lexical_boost_with_idf(&query_terms, &tokenize("nothing here"), |_| 1.0),
2467            0
2468        );
2469    }
2470
2471    #[test]
2472    fn score_node_uses_key_facts_and_notes_without_primary_match() {
2473        let node = make_node(
2474            "concept:gateway",
2475            "Gateway",
2476            "",
2477            &["Autentykacja OAuth2 przez konto producenta"],
2478            &[],
2479            0.5,
2480            0.0,
2481            0,
2482        );
2483        let mut graph = GraphFile::new("test");
2484        graph.nodes.push(node.clone());
2485        graph.notes.push(crate::graph::Note {
2486            id: "note:oauth".to_owned(),
2487            node_id: node.id.clone(),
2488            body: "Token refresh przez OAuth2".to_owned(),
2489            tags: vec!["oauth2".to_owned()],
2490            ..Default::default()
2491        });
2492
2493        let pattern = Pattern::parse(
2494            "oauth2 producenta",
2495            CaseMatching::Ignore,
2496            Normalization::Smart,
2497        );
2498        let context = FindQueryContext::build(&graph);
2499        let mut matcher = Matcher::new(Config::DEFAULT);
2500        let score = score_node(&context, &node, "oauth2 producenta", &pattern, &mut matcher);
2501        assert!(score.is_some_and(|value| value > 0));
2502
2503        let empty_graph = GraphFile::new("empty");
2504        let empty_node = make_node("concept:gateway", "Gateway", "", &[], &[], 0.5, 0.0, 0);
2505        let empty_context = FindQueryContext::build(&empty_graph);
2506        let mut matcher = Matcher::new(Config::DEFAULT);
2507        let empty_score = score_node(
2508            &empty_context,
2509            &empty_node,
2510            "oauth2 producenta",
2511            &pattern,
2512            &mut matcher,
2513        );
2514        assert!(empty_score.is_none());
2515    }
2516
2517    #[test]
2518    fn score_bm25_respects_importance_boost_for_equal_documents() {
2519        let mut graph = GraphFile::new("test");
2520        graph.nodes.push(make_node(
2521            "concept:high",
2522            "High",
2523            "smart home api",
2524            &[],
2525            &[],
2526            1.0,
2527            0.0,
2528            0,
2529        ));
2530        graph.nodes.push(make_node(
2531            "concept:low",
2532            "Low",
2533            "smart home api",
2534            &[],
2535            &[],
2536            0.0,
2537            0.0,
2538            0,
2539        ));
2540
2541        let results = find_all_matches_with_index(
2542            &graph,
2543            "smart home api",
2544            true,
2545            false,
2546            FindMode::Bm25,
2547            None,
2548            None,
2549        );
2550        let high_score = score_for(&results, "concept:high");
2551        let low_score = score_for(&results, "concept:low");
2552        assert!(high_score > low_score);
2553    }
2554
2555    #[test]
2556    fn bm25_prefers_self_match_over_neighbor_only_match() {
2557        let mut graph = GraphFile::new("test");
2558        graph.nodes.push(make_node(
2559            "concept:self_hit",
2560            "Batch plugin output directory",
2561            "",
2562            &["BatchPlugin OUTPUT_DIR rule in WebLogic path"],
2563            &[],
2564            0.5,
2565            0.0,
2566            0,
2567        ));
2568        graph.nodes.push(make_node(
2569            "concept:hub",
2570            "Integration Hub",
2571            "gateway for many systems",
2572            &[],
2573            &[],
2574            0.5,
2575            0.0,
2576            0,
2577        ));
2578        graph.nodes.push(make_node(
2579            "concept:neighbor_hit",
2580            "BatchPlugin OUTPUT_DIR in WebLogic",
2581            "",
2582            &[],
2583            &[],
2584            0.5,
2585            0.0,
2586            0,
2587        ));
2588        graph
2589            .edges
2590            .push(make_edge("concept:hub", "HAS", "concept:neighbor_hit"));
2591
2592        let results = find_all_matches_with_index(
2593            &graph,
2594            "BatchPlugin OUTPUT_DIR WebLogic",
2595            true,
2596            false,
2597            FindMode::Bm25,
2598            None,
2599            None,
2600        );
2601
2602        assert!(results.iter().any(|item| item.node.id == "concept:hub"));
2603        assert!(score_for(&results, "concept:self_hit") > score_for(&results, "concept:hub"));
2604    }
2605
2606    #[test]
2607    fn link_rendering_sorts_incident_edges_by_query_relevance() {
2608        let mut graph = GraphFile::new("test");
2609        graph.nodes.push(make_node(
2610            "concept:center",
2611            "Center",
2612            "",
2613            &[],
2614            &[],
2615            0.5,
2616            0.0,
2617            0,
2618        ));
2619        graph.nodes.push(make_node(
2620            "concept:relevant",
2621            "Push notification template",
2622            "",
2623            &[],
2624            &[],
2625            0.2,
2626            0.0,
2627            0,
2628        ));
2629        graph.nodes.push(make_node(
2630            "concept:irrelevant_a",
2631            "Billing ledger",
2632            "",
2633            &[],
2634            &[],
2635            0.9,
2636            0.0,
2637            0,
2638        ));
2639        graph.nodes.push(make_node(
2640            "concept:irrelevant_b",
2641            "Audit trail",
2642            "",
2643            &[],
2644            &[],
2645            0.8,
2646            0.0,
2647            0,
2648        ));
2649        graph
2650            .edges
2651            .push(make_edge("concept:center", "HAS", "concept:irrelevant_a"));
2652        graph
2653            .edges
2654            .push(make_edge("concept:center", "HAS", "concept:irrelevant_b"));
2655        graph
2656            .edges
2657            .push(make_edge("concept:center", "HAS", "concept:relevant"));
2658
2659        let center = graph.node_by_id("concept:center").expect("center node");
2660        let lines = render_node_link_lines(&graph, center, 2, Some("push notification template"));
2661
2662        let first_edge = lines
2663            .iter()
2664            .find(|line| line.starts_with("-> "))
2665            .expect("first edge line");
2666        assert!(first_edge.contains("concept:relevant"));
2667    }
2668
2669    #[test]
2670    fn final_score_caps_authority_boost_for_weak_relevance() {
2671        let weak = make_node(
2672            "concept:weak",
2673            "Weak",
2674            "smart home api",
2675            &[],
2676            &[],
2677            1.0,
2678            300.0,
2679            1,
2680        );
2681        let strong = make_node(
2682            "concept:strong",
2683            "Strong",
2684            "smart home api smart home api smart home api smart home api",
2685            &[],
2686            &[],
2687            0.5,
2688            0.0,
2689            0,
2690        );
2691        let candidates = vec![
2692            RawCandidate {
2693                node: &weak,
2694                raw_relevance: 12.0,
2695                lexical_boost: 0,
2696            },
2697            RawCandidate {
2698                node: &strong,
2699                raw_relevance: 100.0,
2700                lexical_boost: 0,
2701            },
2702        ];
2703        let scored = compose_scores(candidates);
2704        let weak_scored = scored
2705            .iter()
2706            .find(|item| item.node.id == "concept:weak")
2707            .expect("weak node");
2708        assert_eq!(
2709            weak_scored.breakdown.authority_applied,
2710            weak_scored.breakdown.authority_cap
2711        );
2712        assert!(weak_scored.breakdown.authority_raw > weak_scored.breakdown.authority_cap);
2713    }
2714
2715    #[test]
2716    fn importance_and_feedback_boost_have_expected_ranges() {
2717        let high_importance = make_node("concept:high", "High", "", &[], &[], 1.0, 0.0, 0);
2718        let low_importance = make_node("concept:low", "Low", "", &[], &[], 0.0, 0.0, 0);
2719        assert_eq!(importance_boost(&high_importance), 66);
2720        assert_eq!(importance_boost(&low_importance), -66);
2721
2722        let positive = make_node("concept:pos", "Pos", "", &[], &[], 0.5, 1.0, 1);
2723        let negative = make_node("concept:neg", "Neg", "", &[], &[], 0.5, -2.0, 1);
2724        let saturated = make_node("concept:sat", "Sat", "", &[], &[], 0.5, 300.0, 1);
2725        assert_eq!(feedback_boost(&positive), 46);
2726        assert_eq!(feedback_boost(&negative), -92);
2727        assert_eq!(feedback_boost(&saturated), 300);
2728    }
2729
2730    #[test]
2731    fn find_deduplicates_results_by_node_id_for_single_query() {
2732        let mut graph = GraphFile::new("test");
2733        graph.nodes.push(make_node(
2734            "concept:rule",
2735            "Business Rule",
2736            "Rule for billing decisions",
2737            &["Business rule validation"],
2738            &["billing rule"],
2739            0.5,
2740            0.0,
2741            0,
2742        ));
2743        graph.nodes.push(make_node(
2744            "concept:rule",
2745            "Business Rule Duplicate",
2746            "Duplicate record with same id",
2747            &["Business rule duplicate"],
2748            &[],
2749            0.5,
2750            0.0,
2751            0,
2752        ));
2753
2754        let results = find_all_matches_with_index(
2755            &graph,
2756            "business rule",
2757            true,
2758            false,
2759            FindMode::Hybrid,
2760            None,
2761            None,
2762        );
2763        let rule_hits = results
2764            .iter()
2765            .filter(|item| item.node.id == "concept:rule")
2766            .count();
2767        assert_eq!(rule_hits, 1);
2768    }
2769
2770    #[test]
2771    fn hybrid_score_does_not_change_when_only_vector_weight_changes() {
2772        let mut graph = GraphFile::new("test");
2773        graph.nodes.push(make_node(
2774            "concept:auth",
2775            "Authentication Rule",
2776            "Business rule for authentication",
2777            &["auth rule"],
2778            &["login policy"],
2779            0.5,
2780            0.0,
2781            0,
2782        ));
2783
2784        let with_vector = find_all_matches_with_index(
2785            &graph,
2786            "authentication rule",
2787            true,
2788            false,
2789            FindMode::Hybrid,
2790            None,
2791            Some(&FindTune {
2792                bm25: 0.55,
2793                fuzzy: 0.35,
2794                vector: 1.0,
2795            }),
2796        );
2797        let no_vector = find_all_matches_with_index(
2798            &graph,
2799            "authentication rule",
2800            true,
2801            false,
2802            FindMode::Hybrid,
2803            None,
2804            Some(&FindTune {
2805                bm25: 0.55,
2806                fuzzy: 0.35,
2807                vector: 0.0,
2808            }),
2809        );
2810
2811        assert_eq!(with_vector.len(), 1);
2812        assert_eq!(no_vector.len(), 1);
2813        assert_eq!(with_vector[0].score, no_vector[0].score);
2814    }
2815
2816    #[test]
2817    fn find_hides_metadata_nodes_unless_enabled() {
2818        let mut graph = GraphFile::new("test");
2819        graph.nodes.push(make_node(
2820            "^:graph_info",
2821            "Graph Metadata",
2822            "Internal metadata",
2823            &["graph_uuid=abc123"],
2824            &[],
2825            0.5,
2826            0.0,
2827            0,
2828        ));
2829        if let Some(meta) = graph
2830            .nodes
2831            .iter_mut()
2832            .find(|node| node.id == "^:graph_info")
2833        {
2834            meta.r#type = "^".to_owned();
2835        }
2836
2837        let hidden = find_all_matches_with_index(
2838            &graph,
2839            "graph uuid",
2840            true,
2841            false,
2842            FindMode::Hybrid,
2843            None,
2844            None,
2845        );
2846        assert!(hidden.is_empty());
2847
2848        let shown = find_all_matches_with_index(
2849            &graph,
2850            "graph uuid",
2851            true,
2852            true,
2853            FindMode::Hybrid,
2854            None,
2855            None,
2856        );
2857        assert_eq!(shown.len(), 1);
2858        assert_eq!(shown[0].node.id, "^:graph_info");
2859    }
2860}
kg/output.rs

kg/
output.rs