kg/
output.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2
3use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
4use nucleo_matcher::{Config, Matcher, Utf32Str};
5
6use crate::graph::{Edge, GraphFile, Node, Note};
7use crate::index::Bm25Index;
8use crate::text_norm;
9
10const BM25_K1: f64 = 1.5;
11const BM25_B: f64 = 0.75;
12const DEFAULT_TARGET_CHARS: usize = 4200;
13const MIN_TARGET_CHARS: usize = 300;
14const MAX_TARGET_CHARS: usize = 12_000;
15const FUZZY_NEIGHBOR_CONTEXT_CAP: u32 = 220;
16const FUZZY_NO_PRIMARY_CONTEXT_DIVISOR: u32 = 3;
17const FUZZY_NEIGHBOR_CONTEXT_DIVISOR: u32 = 3;
18const FUZZY_DESCRIPTION_WEIGHT: u32 = 2;
19const FUZZY_FACT_WEIGHT: u32 = 2;
20const FUZZY_NOTE_BODY_WEIGHT: u32 = 1;
21const FUZZY_NOTE_TAG_WEIGHT: u32 = 2;
22const BM25_PHRASE_MATCH_BOOST: i64 = 120;
23const BM25_PROXIMITY_MATCH_BOOST: i64 = 80;
24const BM25_TOKEN_MATCH_BOOST: i64 = 45;
25const BM25_ID_WEIGHT: usize = 5;
26const BM25_NAME_WEIGHT: usize = 4;
27const BM25_ALIAS_WEIGHT: usize = 4;
28const BM25_DESCRIPTION_WEIGHT: usize = 2;
29const BM25_FACT_WEIGHT: usize = 2;
30const BM25_NOTE_BODY_WEIGHT: usize = 1;
31const BM25_NOTE_TAG_WEIGHT: usize = 1;
32const BM25_NEIGHBOR_WEIGHT: usize = 1;
33const BM25_SELF_CONTEXT_WEIGHT: f64 = 3.0;
34const BM25_NEIGHBOR_CONTEXT_WEIGHT: f64 = 1.0;
35const BM25_PROXIMITY_WINDOW_TOKENS: usize = 6;
36const FACT_VOLUME_BASE_CHARS: f64 = 500.0;
37const FACT_VOLUME_MIN_FACTOR: f64 = 0.35;
38const IMPORTANCE_NEUTRAL: f64 = 0.5;
39const IMPORTANCE_MAX_ABS_BOOST: f64 = 66.0;
40const SCORE_META_MAX_RATIO: f64 = 0.35;
41const SCORE_META_MIN_CAP: i64 = 30;
42const SCORE_META_MAX_CAP: i64 = 240;
43
44#[derive(Debug, Clone, Copy)]
45pub enum FindMode {
46    Fuzzy,
47    Bm25,
48    Hybrid,
49}
50
51#[derive(Debug, Clone, Copy)]
52pub struct FindTune {
53    pub bm25: f64,
54    pub fuzzy: f64,
55    pub vector: f64,
56}
57
58impl FindTune {
59    pub fn parse(raw: &str) -> Option<Self> {
60        let mut tune = Self::default();
61        for part in raw.split(',') {
62            let (key, value) = part.split_once('=')?;
63            let value = value.trim().parse::<f64>().ok()?;
64            match key.trim() {
65                "bm25" => tune.bm25 = value,
66                "fuzzy" => tune.fuzzy = value,
67                "vector" => tune.vector = value,
68                _ => return None,
69            }
70        }
71        Some(tune.clamped())
72    }
73
74    fn clamped(self) -> Self {
75        Self {
76            bm25: self.bm25.clamp(0.0, 1.0),
77            fuzzy: self.fuzzy.clamp(0.0, 1.0),
78            vector: self.vector.clamp(0.0, 1.0),
79        }
80    }
81}
82
83impl Default for FindTune {
84    fn default() -> Self {
85        Self {
86            bm25: 0.55,
87            fuzzy: 0.35,
88            vector: 0.10,
89        }
90    }
91}
92
93#[derive(Clone, Copy)]
94struct ScoredNode<'a> {
95    score: i64,
96    node: &'a Node,
97    breakdown: ScoreBreakdown,
98}
99
100#[derive(Debug, Clone, Copy)]
101struct ScoreBreakdown {
102    raw_relevance: f64,
103    normalized_relevance: i64,
104    lexical_boost: i64,
105    feedback_boost: i64,
106    importance_boost: i64,
107    authority_raw: i64,
108    authority_applied: i64,
109    authority_cap: i64,
110}
111
112struct RawCandidate<'a> {
113    node: &'a Node,
114    raw_relevance: f64,
115    lexical_boost: i64,
116}
117
118struct FindQueryContext<'a> {
119    notes_by_node: HashMap<&'a str, Vec<&'a Note>>,
120    neighbors_by_node: HashMap<&'a str, Vec<&'a Node>>,
121}
122
123impl<'a> FindQueryContext<'a> {
124    fn build(graph: &'a GraphFile) -> Self {
125        let node_by_id: HashMap<&'a str, &'a Node> = graph
126            .nodes
127            .iter()
128            .map(|node| (node.id.as_str(), node))
129            .collect();
130
131        let mut notes_by_node: HashMap<&'a str, Vec<&'a Note>> = HashMap::new();
132        for note in &graph.notes {
133            notes_by_node
134                .entry(note.node_id.as_str())
135                .or_default()
136                .push(note);
137        }
138
139        let mut neighbors_by_node: HashMap<&'a str, Vec<&'a Node>> = HashMap::new();
140        for edge in &graph.edges {
141            if let (Some(source), Some(target)) = (
142                node_by_id.get(edge.source_id.as_str()),
143                node_by_id.get(edge.target_id.as_str()),
144            ) {
145                neighbors_by_node
146                    .entry(source.id.as_str())
147                    .or_default()
148                    .push(*target);
149                neighbors_by_node
150                    .entry(target.id.as_str())
151                    .or_default()
152                    .push(*source);
153            }
154        }
155
156        for neighbors in neighbors_by_node.values_mut() {
157            neighbors.sort_by(|left, right| left.id.cmp(&right.id));
158            neighbors.dedup_by(|left, right| left.id == right.id);
159        }
160
161        Self {
162            notes_by_node,
163            neighbors_by_node,
164        }
165    }
166
167    fn notes_for(&self, node_id: &str) -> &[&'a Note] {
168        self.notes_by_node
169            .get(node_id)
170            .map(Vec::as_slice)
171            .unwrap_or(&[])
172    }
173
174    fn neighbors_for(&self, node_id: &str) -> &[&'a Node] {
175        self.neighbors_by_node
176            .get(node_id)
177            .map(Vec::as_slice)
178            .unwrap_or(&[])
179    }
180}
181
182#[derive(Debug, Clone)]
183pub struct ScoreBreakdownResult {
184    pub raw_relevance: f64,
185    pub normalized_relevance: i64,
186    pub lexical_boost: i64,
187    pub feedback_boost: i64,
188    pub importance_boost: i64,
189    pub authority_raw: i64,
190    pub authority_applied: i64,
191    pub authority_cap: i64,
192}
193
194#[derive(Debug, Clone)]
195pub struct ScoredNodeResult {
196    pub score: i64,
197    pub node: Node,
198    pub breakdown: ScoreBreakdownResult,
199}
200
201pub fn render_find(
202    graph: &GraphFile,
203    queries: &[String],
204    limit: usize,
205    include_features: bool,
206    include_metadata: bool,
207    mode: FindMode,
208    full: bool,
209) -> String {
210    render_find_with_index(
211        graph,
212        queries,
213        limit,
214        include_features,
215        include_metadata,
216        mode,
217        full,
218        false,
219        None,
220    )
221}
222
223pub fn render_find_with_index(
224    graph: &GraphFile,
225    queries: &[String],
226    limit: usize,
227    include_features: bool,
228    include_metadata: bool,
229    mode: FindMode,
230    full: bool,
231    debug_score: bool,
232    index: Option<&Bm25Index>,
233) -> String {
234    render_find_with_index_tuned(
235        graph,
236        queries,
237        limit,
238        include_features,
239        include_metadata,
240        mode,
241        full,
242        debug_score,
243        index,
244        None,
245    )
246}
247
248pub fn render_find_with_index_tuned(
249    graph: &GraphFile,
250    queries: &[String],
251    limit: usize,
252    include_features: bool,
253    include_metadata: bool,
254    mode: FindMode,
255    full: bool,
256    debug_score: bool,
257    index: Option<&Bm25Index>,
258    tune: Option<&FindTune>,
259) -> String {
260    let mut sections = Vec::new();
261    for query in queries {
262        let matches = find_all_matches_with_index(
263            graph,
264            query,
265            include_features,
266            include_metadata,
267            mode,
268            index,
269            tune,
270        );
271        let total = matches.len();
272        let visible: Vec<_> = matches.into_iter().take(limit).collect();
273        let shown = visible.len();
274        let mut lines = vec![render_result_header(query, shown, total)];
275        for scored in visible {
276            lines.push(render_scored_node_block(
277                graph,
278                &scored,
279                full,
280                debug_score,
281                Some(query.as_str()),
282            ));
283        }
284        push_limit_omission_line(&mut lines, shown, total);
285        sections.push(lines.join("\n"));
286    }
287    format!("{}\n", sections.join("\n\n"))
288}
289
290pub fn find_nodes(
291    graph: &GraphFile,
292    query: &str,
293    limit: usize,
294    include_features: bool,
295    include_metadata: bool,
296    mode: FindMode,
297) -> Vec<Node> {
298    find_matches_with_index(
299        graph,
300        query,
301        limit,
302        include_features,
303        include_metadata,
304        mode,
305        None,
306        None,
307    )
308    .into_iter()
309    .map(|item| item.node.clone())
310    .collect()
311}
312
313pub fn find_nodes_with_index(
314    graph: &GraphFile,
315    query: &str,
316    limit: usize,
317    include_features: bool,
318    include_metadata: bool,
319    mode: FindMode,
320    index: Option<&Bm25Index>,
321) -> Vec<Node> {
322    find_matches_with_index(
323        graph,
324        query,
325        limit,
326        include_features,
327        include_metadata,
328        mode,
329        index,
330        None,
331    )
332    .into_iter()
333    .map(|item| item.node.clone())
334    .collect()
335}
336
337pub fn find_nodes_with_index_tuned(
338    graph: &GraphFile,
339    query: &str,
340    limit: usize,
341    include_features: bool,
342    include_metadata: bool,
343    mode: FindMode,
344    index: Option<&Bm25Index>,
345    tune: Option<&FindTune>,
346) -> Vec<Node> {
347    find_matches_with_index(
348        graph,
349        query,
350        limit,
351        include_features,
352        include_metadata,
353        mode,
354        index,
355        tune,
356    )
357    .into_iter()
358    .map(|item| item.node.clone())
359    .collect()
360}
361
362pub fn find_nodes_and_total_with_index(
363    graph: &GraphFile,
364    query: &str,
365    limit: usize,
366    include_features: bool,
367    include_metadata: bool,
368    mode: FindMode,
369    index: Option<&Bm25Index>,
370) -> (usize, Vec<Node>) {
371    let matches = find_all_matches_with_index(
372        graph,
373        query,
374        include_features,
375        include_metadata,
376        mode,
377        index,
378        None,
379    );
380    let total = matches.len();
381    let nodes = matches
382        .into_iter()
383        .take(limit)
384        .map(|item| item.node.clone())
385        .collect();
386    (total, nodes)
387}
388
389pub fn find_scored_nodes_and_total_with_index(
390    graph: &GraphFile,
391    query: &str,
392    limit: usize,
393    include_features: bool,
394    include_metadata: bool,
395    mode: FindMode,
396    index: Option<&Bm25Index>,
397) -> (usize, Vec<ScoredNodeResult>) {
398    find_scored_nodes_and_total_with_index_tuned(
399        graph,
400        query,
401        limit,
402        include_features,
403        include_metadata,
404        mode,
405        index,
406        None,
407    )
408}
409
410pub fn find_scored_nodes_and_total_with_index_tuned(
411    graph: &GraphFile,
412    query: &str,
413    limit: usize,
414    include_features: bool,
415    include_metadata: bool,
416    mode: FindMode,
417    index: Option<&Bm25Index>,
418    tune: Option<&FindTune>,
419) -> (usize, Vec<ScoredNodeResult>) {
420    let matches = find_all_matches_with_index(
421        graph,
422        query,
423        include_features,
424        include_metadata,
425        mode,
426        index,
427        tune,
428    );
429    let total = matches.len();
430    let nodes = matches
431        .into_iter()
432        .take(limit)
433        .map(|item| ScoredNodeResult {
434            score: item.score,
435            node: item.node.clone(),
436            breakdown: ScoreBreakdownResult {
437                raw_relevance: item.breakdown.raw_relevance,
438                normalized_relevance: item.breakdown.normalized_relevance,
439                lexical_boost: item.breakdown.lexical_boost,
440                feedback_boost: item.breakdown.feedback_boost,
441                importance_boost: item.breakdown.importance_boost,
442                authority_raw: item.breakdown.authority_raw,
443                authority_applied: item.breakdown.authority_applied,
444                authority_cap: item.breakdown.authority_cap,
445            },
446        })
447        .collect();
448    (total, nodes)
449}
450
451pub fn count_find_results(
452    graph: &GraphFile,
453    queries: &[String],
454    limit: usize,
455    include_features: bool,
456    include_metadata: bool,
457    mode: FindMode,
458) -> usize {
459    count_find_results_with_index(
460        graph,
461        queries,
462        limit,
463        include_features,
464        include_metadata,
465        mode,
466        None,
467    )
468}
469
470pub fn count_find_results_with_index(
471    graph: &GraphFile,
472    queries: &[String],
473    _limit: usize,
474    include_features: bool,
475    include_metadata: bool,
476    mode: FindMode,
477    index: Option<&Bm25Index>,
478) -> usize {
479    let mut total = 0;
480    for query in queries {
481        total += find_all_matches_with_index(
482            graph,
483            query,
484            include_features,
485            include_metadata,
486            mode,
487            index,
488            None,
489        )
490        .len();
491    }
492    total
493}
494
495pub fn render_node(graph: &GraphFile, node: &Node, full: bool) -> String {
496    format!("{}\n", render_node_block(graph, node, full))
497}
498
499pub fn render_node_adaptive(graph: &GraphFile, node: &Node, target_chars: Option<usize>) -> String {
500    let target = clamp_target_chars(target_chars);
501    let full = format!("{}\n", render_node_block(graph, node, true));
502    if fits_target_chars(&full, target) {
503        return full;
504    }
505    let mut candidates = Vec::new();
506    for (depth, detail, edge_cap) in [
507        (0usize, DetailLevel::Rich, 8usize),
508        (1usize, DetailLevel::Rich, 8usize),
509        (2usize, DetailLevel::Rich, 6usize),
510        (2usize, DetailLevel::Compact, 6usize),
511        (2usize, DetailLevel::Minimal, 2usize),
512    ] {
513        let rendered = render_single_node_candidate(graph, node, depth, detail, edge_cap);
514        candidates.push(Candidate {
515            rendered,
516            depth,
517            detail,
518            shown_nodes: 1 + depth,
519        });
520    }
521    pick_best_candidate(candidates, target)
522}
523
524pub fn render_find_adaptive_with_index(
525    graph: &GraphFile,
526    queries: &[String],
527    limit: usize,
528    include_features: bool,
529    include_metadata: bool,
530    mode: FindMode,
531    target_chars: Option<usize>,
532    debug_score: bool,
533    index: Option<&Bm25Index>,
534) -> String {
535    render_find_adaptive_with_index_tuned(
536        graph,
537        queries,
538        limit,
539        include_features,
540        include_metadata,
541        mode,
542        target_chars,
543        debug_score,
544        index,
545        None,
546    )
547}
548
549pub fn render_find_adaptive_with_index_tuned(
550    graph: &GraphFile,
551    queries: &[String],
552    limit: usize,
553    include_features: bool,
554    include_metadata: bool,
555    mode: FindMode,
556    target_chars: Option<usize>,
557    debug_score: bool,
558    index: Option<&Bm25Index>,
559    tune: Option<&FindTune>,
560) -> String {
561    let target = clamp_target_chars(target_chars);
562    let mut sections = Vec::new();
563    for query in queries {
564        let matches = find_all_matches_with_index(
565            graph,
566            query,
567            include_features,
568            include_metadata,
569            mode,
570            index,
571            tune,
572        );
573        let total = matches.len();
574        let visible: Vec<_> = matches.into_iter().take(limit).collect();
575        let section = if visible.len() == 1 {
576            render_single_result_section(graph, query, &visible[0], total, target, debug_score)
577        } else {
578            render_multi_result_section(graph, query, &visible, total, target, debug_score)
579        };
580        sections.push(section);
581    }
582    format!("{}\n", sections.join("\n\n"))
583}
584
585#[derive(Clone, Copy)]
586enum DetailLevel {
587    Rich,
588    Compact,
589    Minimal,
590}
591
592struct Candidate {
593    rendered: String,
594    depth: usize,
595    detail: DetailLevel,
596    shown_nodes: usize,
597}
598
599impl DetailLevel {
600    fn utility_bonus(self) -> usize {
601        match self {
602            DetailLevel::Rich => 20,
603            DetailLevel::Compact => 10,
604            DetailLevel::Minimal => 0,
605        }
606    }
607}
608
609fn clamp_target_chars(target_chars: Option<usize>) -> usize {
610    target_chars
611        .unwrap_or(DEFAULT_TARGET_CHARS)
612        .clamp(MIN_TARGET_CHARS, MAX_TARGET_CHARS)
613}
614
615fn render_single_result_section(
616    graph: &GraphFile,
617    query: &str,
618    node: &ScoredNode<'_>,
619    total_available: usize,
620    target: usize,
621    debug_score: bool,
622) -> String {
623    let header = render_result_header(query, 1, total_available);
624    let full = render_single_result_candidate(
625        graph,
626        query,
627        &header,
628        node,
629        total_available,
630        0,
631        DetailLevel::Rich,
632        8,
633        true,
634        debug_score,
635    );
636    if fits_target_chars(&full, target) {
637        return full.trim_end().to_owned();
638    }
639    let mut candidates = Vec::new();
640    for (depth, detail, edge_cap) in [
641        (0usize, DetailLevel::Rich, 8usize),
642        (1usize, DetailLevel::Rich, 8usize),
643        (2usize, DetailLevel::Rich, 6usize),
644        (2usize, DetailLevel::Compact, 6usize),
645        (2usize, DetailLevel::Minimal, 2usize),
646    ] {
647        candidates.push(Candidate {
648            rendered: render_single_result_candidate(
649                graph,
650                query,
651                &header,
652                node,
653                total_available,
654                depth,
655                detail,
656                edge_cap,
657                false,
658                debug_score,
659            ),
660            depth,
661            detail,
662            shown_nodes: 1 + depth,
663        });
664    }
665    pick_best_candidate(candidates, target)
666        .trim_end()
667        .to_owned()
668}
669
670fn render_multi_result_section(
671    graph: &GraphFile,
672    query: &str,
673    nodes: &[ScoredNode<'_>],
674    total_available: usize,
675    target: usize,
676    debug_score: bool,
677) -> String {
678    let visible_total = nodes.len();
679    let full = render_full_result_section(graph, query, nodes, total_available, debug_score);
680    if fits_target_chars(&full, target) {
681        return full;
682    }
683    let mut candidates = Vec::new();
684    let full_cap = visible_total;
685    let mid_cap = full_cap.min(5);
686    let low_cap = full_cap.min(3);
687
688    for (detail, edge_cap, result_cap, depth) in [
689        (DetailLevel::Rich, 4usize, full_cap.min(4), 0usize),
690        (DetailLevel::Compact, 3usize, full_cap, 0usize),
691        (DetailLevel::Rich, 2usize, mid_cap, 1usize),
692        (DetailLevel::Compact, 1usize, full_cap, 0usize),
693        (DetailLevel::Minimal, 1usize, mid_cap, 0usize),
694        (DetailLevel::Minimal, 0usize, low_cap, 0usize),
695        (DetailLevel::Minimal, 0usize, low_cap.min(2), 1usize),
696    ] {
697        let shown = result_cap.min(nodes.len());
698        let mut lines = vec![render_result_header(query, shown, total_available)];
699        for node in nodes.iter().take(shown) {
700            lines.extend(render_scored_node_candidate_lines(
701                graph,
702                query,
703                node,
704                0,
705                detail,
706                edge_cap,
707                debug_score,
708            ));
709            if depth > 0 {
710                lines.extend(render_neighbor_layers(graph, node.node, depth, detail));
711            }
712        }
713        if visible_total > shown {
714            lines.push(format!("... +{} more nodes omitted", visible_total - shown));
715        }
716        push_limit_omission_line(&mut lines, visible_total, total_available);
717        candidates.push(Candidate {
718            rendered: format!("{}\n", lines.join("\n")),
719            depth,
720            detail,
721            shown_nodes: shown,
722        });
723    }
724
725    pick_best_candidate(candidates, target)
726        .trim_end()
727        .to_owned()
728}
729
730fn pick_best_candidate(candidates: Vec<Candidate>, target: usize) -> String {
731    let lower = (target as f64 * 0.7) as usize;
732    let mut best: Option<(usize, usize, usize, usize, String)> = None;
733
734    for candidate in candidates {
735        let chars = candidate.rendered.chars().count();
736        let overshoot = chars.saturating_sub(target);
737        let undershoot = lower.saturating_sub(chars);
738        let penalty = overshoot.saturating_mul(10).saturating_add(undershoot);
739        let utility = candidate
740            .depth
741            .saturating_mul(100)
742            .saturating_add(candidate.shown_nodes.saturating_mul(5))
743            .saturating_add(candidate.detail.utility_bonus());
744
745        let entry = (
746            penalty,
747            overshoot,
748            usize::MAX - utility,
749            usize::MAX - chars,
750            candidate.rendered,
751        );
752        if best.as_ref().is_none_or(|current| {
753            entry.0 < current.0
754                || (entry.0 == current.0 && entry.1 < current.1)
755                || (entry.0 == current.0 && entry.1 == current.1 && entry.2 < current.2)
756                || (entry.0 == current.0
757                    && entry.1 == current.1
758                    && entry.2 == current.2
759                    && entry.3 < current.3)
760        }) {
761            best = Some(entry);
762        }
763    }
764
765    best.map(|item| item.4).unwrap_or_else(|| "\n".to_owned())
766}
767
768fn render_full_result_section(
769    graph: &GraphFile,
770    query: &str,
771    nodes: &[ScoredNode<'_>],
772    total_available: usize,
773    debug_score: bool,
774) -> String {
775    let mut lines = vec![render_result_header(query, nodes.len(), total_available)];
776    for node in nodes {
777        lines.push(render_scored_node_block(
778            graph,
779            node,
780            true,
781            debug_score,
782            Some(query),
783        ));
784    }
785    push_limit_omission_line(&mut lines, nodes.len(), total_available);
786    lines.join("\n")
787}
788
789fn render_result_header(query: &str, shown: usize, total: usize) -> String {
790    let query = escape_cli_text(query);
791    if shown < total {
792        format!("? {query} ({shown}/{total})")
793    } else {
794        format!("? {query} ({total})")
795    }
796}
797
798fn push_limit_omission_line(lines: &mut Vec<String>, shown: usize, total: usize) {
799    let omitted = total.saturating_sub(shown);
800    if omitted > 0 {
801        lines.push(format!("... {omitted} more nodes omitted by limit"));
802    }
803}
804
805fn fits_target_chars(rendered: &str, target: usize) -> bool {
806    rendered.chars().count() <= target
807}
808
809fn render_single_node_candidate(
810    graph: &GraphFile,
811    node: &Node,
812    depth: usize,
813    detail: DetailLevel,
814    edge_cap: usize,
815) -> String {
816    let lines = render_single_node_candidate_lines(graph, node, depth, detail, edge_cap, None);
817    format!("{}\n", lines.join("\n"))
818}
819
820fn render_single_result_candidate(
821    graph: &GraphFile,
822    query: &str,
823    header: &str,
824    node: &ScoredNode<'_>,
825    total_available: usize,
826    depth: usize,
827    detail: DetailLevel,
828    edge_cap: usize,
829    full: bool,
830    debug_score: bool,
831) -> String {
832    let mut lines = vec![header.to_owned()];
833    if full {
834        lines.push(render_scored_node_block(
835            graph,
836            node,
837            true,
838            debug_score,
839            Some(query),
840        ));
841    } else {
842        lines.extend(render_scored_node_candidate_lines(
843            graph,
844            query,
845            node,
846            depth,
847            detail,
848            edge_cap,
849            debug_score,
850        ));
851    }
852    push_limit_omission_line(&mut lines, 1, total_available);
853    format!("{}\n", lines.join("\n"))
854}
855
856fn render_single_node_candidate_lines(
857    graph: &GraphFile,
858    node: &Node,
859    depth: usize,
860    detail: DetailLevel,
861    edge_cap: usize,
862    query: Option<&str>,
863) -> Vec<String> {
864    let mut lines = render_node_lines_with_edges(graph, node, detail, edge_cap, query);
865    if depth > 0 {
866        lines.extend(render_neighbor_layers(graph, node, depth, detail));
867    }
868    lines
869}
870
871fn render_scored_node_candidate_lines(
872    graph: &GraphFile,
873    query: &str,
874    node: &ScoredNode<'_>,
875    depth: usize,
876    detail: DetailLevel,
877    edge_cap: usize,
878    debug_score: bool,
879) -> Vec<String> {
880    let mut lines = vec![format!("score: {}", node.score)];
881    if debug_score {
882        lines.push(render_score_debug_line(node));
883    }
884    lines.extend(render_single_node_candidate_lines(
885        graph,
886        node.node,
887        depth,
888        detail,
889        edge_cap,
890        Some(query),
891    ));
892    lines
893}
894
895fn render_scored_node_block(
896    graph: &GraphFile,
897    node: &ScoredNode<'_>,
898    full: bool,
899    debug_score: bool,
900    query: Option<&str>,
901) -> String {
902    if debug_score {
903        format!(
904            "score: {}\n{}\n{}",
905            node.score,
906            render_score_debug_line(node),
907            render_node_block_with_query(graph, node.node, full, query)
908        )
909    } else {
910        format!(
911            "score: {}\n{}",
912            node.score,
913            render_node_block_with_query(graph, node.node, full, query)
914        )
915    }
916}
917
918fn render_score_debug_line(node: &ScoredNode<'_>) -> String {
919    format!(
920        "score_debug: raw_relevance={:.3} normalized_relevance={} lexical_boost={} feedback_boost={} importance_boost={} authority_raw={} authority_applied={} authority_cap={}",
921        node.breakdown.raw_relevance,
922        node.breakdown.normalized_relevance,
923        node.breakdown.lexical_boost,
924        node.breakdown.feedback_boost,
925        node.breakdown.importance_boost,
926        node.breakdown.authority_raw,
927        node.breakdown.authority_applied,
928        node.breakdown.authority_cap,
929    )
930}
931
932fn render_neighbor_layers(
933    graph: &GraphFile,
934    root: &Node,
935    max_depth: usize,
936    detail: DetailLevel,
937) -> Vec<String> {
938    let mut out = Vec::new();
939    let mut seen: HashSet<String> = HashSet::from([root.id.clone()]);
940    let mut queue: VecDeque<(String, usize)> = VecDeque::from([(root.id.clone(), 0usize)]);
941    let mut layers: Vec<Vec<&Node>> = vec![Vec::new(); max_depth + 1];
942
943    while let Some((node_id, depth)) = queue.pop_front() {
944        if depth >= max_depth {
945            continue;
946        }
947        for incident in incident_edges(graph, &node_id) {
948            if seen.insert(incident.related.id.clone()) {
949                let next_depth = depth + 1;
950                if next_depth <= max_depth {
951                    layers[next_depth].push(incident.related);
952                    queue.push_back((incident.related.id.clone(), next_depth));
953                }
954            }
955        }
956    }
957
958    for depth in 1..=max_depth {
959        if layers[depth].is_empty() {
960            continue;
961        }
962        let cap = match detail {
963            DetailLevel::Rich => 6,
964            DetailLevel::Compact => 4,
965            DetailLevel::Minimal => 3,
966        };
967        let shown = layers[depth].len().min(cap);
968        out.push(format!(
969            "depth {depth}: {shown}/{} neighbors",
970            layers[depth].len()
971        ));
972        for node in layers[depth].iter().take(shown) {
973            out.extend(render_node_identity_lines(node, detail));
974        }
975        if layers[depth].len() > shown {
976            out.push(format!(
977                "... +{} more neighbors omitted",
978                layers[depth].len() - shown
979            ));
980        }
981    }
982
983    out
984}
985
986fn render_node_lines_with_edges(
987    graph: &GraphFile,
988    node: &Node,
989    detail: DetailLevel,
990    edge_cap: usize,
991    query: Option<&str>,
992) -> Vec<String> {
993    let mut lines = render_node_identity_lines(node, detail);
994    lines.extend(render_node_link_lines(graph, node, edge_cap, query));
995    lines
996}
997
998fn render_node_identity_lines(node: &Node, detail: DetailLevel) -> Vec<String> {
999    let mut lines = Vec::new();
1000    match detail {
1001        DetailLevel::Rich => {
1002            lines.push(format!(
1003                "# {} | {} [{}]",
1004                node.id,
1005                escape_cli_text(&node.name),
1006                node.r#type
1007            ));
1008            if !node.properties.alias.is_empty() {
1009                lines.push(format!(
1010                    "aka: {}",
1011                    node.properties
1012                        .alias
1013                        .iter()
1014                        .map(|alias| escape_cli_text(alias))
1015                        .collect::<Vec<_>>()
1016                        .join(", ")
1017                ));
1018            }
1019            push_description_line(&mut lines, &node.properties.description, None);
1020            let shown_facts = node.properties.key_facts.len().min(3);
1021            for fact in node.properties.key_facts.iter().take(shown_facts) {
1022                lines.push(format!("- {}", escape_cli_text(fact)));
1023            }
1024            let omitted = node.properties.key_facts.len().saturating_sub(shown_facts);
1025            if omitted > 0 {
1026                lines.push(format!("... {omitted} more facts omitted"));
1027            }
1028        }
1029        DetailLevel::Compact => {
1030            lines.push(format!(
1031                "# {} | {} [{}]",
1032                node.id,
1033                escape_cli_text(&node.name),
1034                node.r#type
1035            ));
1036            push_description_line(&mut lines, &node.properties.description, Some(140));
1037            if let Some(fact) = node.properties.key_facts.first() {
1038                lines.push(format!("- {}", escape_cli_text(fact)));
1039            }
1040        }
1041        DetailLevel::Minimal => {
1042            lines.push(format!(
1043                "# {} | {} [{}]",
1044                node.id,
1045                escape_cli_text(&node.name),
1046                node.r#type
1047            ));
1048        }
1049    }
1050    lines
1051}
1052
1053fn render_node_link_lines(
1054    graph: &GraphFile,
1055    node: &Node,
1056    edge_cap: usize,
1057    query: Option<&str>,
1058) -> Vec<String> {
1059    let mut incident = incident_edges(graph, &node.id);
1060    if let Some(query) = query {
1061        let query_terms = text_norm::expand_query_terms(query);
1062        if !query_terms.is_empty() {
1063            incident.sort_by(|left, right| {
1064                let right_relevance = incident_edge_query_relevance(right, &query_terms);
1065                let left_relevance = incident_edge_query_relevance(left, &query_terms);
1066                right_relevance
1067                    .cmp(&left_relevance)
1068                    .then_with(|| incident_edge_default_cmp(left, right))
1069            });
1070        }
1071    }
1072    if incident.is_empty() {
1073        return Vec::new();
1074    }
1075
1076    let mut lines = Vec::new();
1077    if incident.len() > 12 {
1078        lines.push(format!("links: {} total", incident.len()));
1079        let (out_summary, in_summary) = summarize_relations(&incident);
1080        if !out_summary.is_empty() {
1081            lines.push(format!("out: {out_summary}"));
1082        }
1083        if !in_summary.is_empty() {
1084            lines.push(format!("in: {in_summary}"));
1085        }
1086    }
1087
1088    let shown = incident.len().min(edge_cap);
1089    for edge in incident.into_iter().take(shown) {
1090        let prefix = if edge.incoming { "<-" } else { "->" };
1091        lines.extend(render_edge_lines(prefix, edge.edge, edge.related, false));
1092    }
1093    if edge_cap > 0 && incident_count(graph, &node.id) > shown {
1094        lines.push(format!(
1095            "... {} more links omitted",
1096            incident_count(graph, &node.id) - shown
1097        ));
1098    }
1099    lines
1100}
1101
1102fn incident_count(graph: &GraphFile, node_id: &str) -> usize {
1103    graph
1104        .edges
1105        .iter()
1106        .filter(|edge| edge.source_id == node_id || edge.target_id == node_id)
1107        .count()
1108}
1109
1110struct IncidentEdge<'a> {
1111    edge: &'a Edge,
1112    related: &'a Node,
1113    incoming: bool,
1114}
1115
1116fn incident_edges<'a>(graph: &'a GraphFile, node_id: &str) -> Vec<IncidentEdge<'a>> {
1117    let mut edges = Vec::new();
1118    for edge in &graph.edges {
1119        if edge.source_id == node_id {
1120            if let Some(related) = graph.node_by_id(&edge.target_id) {
1121                edges.push(IncidentEdge {
1122                    edge,
1123                    related,
1124                    incoming: false,
1125                });
1126            }
1127        } else if edge.target_id == node_id {
1128            if let Some(related) = graph.node_by_id(&edge.source_id) {
1129                edges.push(IncidentEdge {
1130                    edge,
1131                    related,
1132                    incoming: true,
1133                });
1134            }
1135        }
1136    }
1137    edges.sort_by(incident_edge_default_cmp);
1138    edges
1139}
1140
1141fn incident_edge_default_cmp(
1142    left: &IncidentEdge<'_>,
1143    right: &IncidentEdge<'_>,
1144) -> std::cmp::Ordering {
1145    right
1146        .related
1147        .properties
1148        .importance
1149        .partial_cmp(&left.related.properties.importance)
1150        .unwrap_or(std::cmp::Ordering::Equal)
1151        .then_with(|| left.edge.relation.cmp(&right.edge.relation))
1152        .then_with(|| left.related.id.cmp(&right.related.id))
1153}
1154
1155fn incident_edge_query_relevance(edge: &IncidentEdge<'_>, query_terms: &[String]) -> i64 {
1156    if query_terms.is_empty() {
1157        return 0;
1158    }
1159    let related = edge.related;
1160    let mut score = 0;
1161    score += query_overlap_score(&related.id, query_terms, 6);
1162    score += query_overlap_score(&related.name, query_terms, 5);
1163    score += query_overlap_score(&related.properties.description, query_terms, 2);
1164    score += query_overlap_score(&edge.edge.relation, query_terms, 2);
1165    score += query_overlap_score(&edge.edge.properties.detail, query_terms, 2);
1166    for alias in &related.properties.alias {
1167        score += query_overlap_score(alias, query_terms, 4);
1168    }
1169    score
1170}
1171
1172fn query_overlap_score(value: &str, query_terms: &[String], weight: i64) -> i64 {
1173    if value.is_empty() || query_terms.is_empty() {
1174        return 0;
1175    }
1176    let value_terms: HashSet<String> = tokenize(value).into_iter().collect();
1177    if value_terms.is_empty() {
1178        return 0;
1179    }
1180    let matches = query_terms
1181        .iter()
1182        .filter(|term| value_terms.contains(term.as_str()))
1183        .count() as i64;
1184    matches * weight
1185}
1186
1187fn summarize_relations(edges: &[IncidentEdge<'_>]) -> (String, String) {
1188    let mut out: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1189    let mut incoming: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1190
1191    for edge in edges {
1192        let bucket = if edge.incoming {
1193            &mut incoming
1194        } else {
1195            &mut out
1196        };
1197        *bucket.entry(edge.edge.relation.clone()).or_insert(0) += 1;
1198    }
1199
1200    (join_relation_counts(&out), join_relation_counts(&incoming))
1201}
1202
1203fn join_relation_counts(counts: &std::collections::BTreeMap<String, usize>) -> String {
1204    counts
1205        .iter()
1206        .take(3)
1207        .map(|(relation, count)| format!("{relation} x{count}"))
1208        .collect::<Vec<_>>()
1209        .join(", ")
1210}
1211
1212fn render_node_block(graph: &GraphFile, node: &Node, full: bool) -> String {
1213    render_node_block_with_query(graph, node, full, None)
1214}
1215
1216fn render_node_block_with_query(
1217    graph: &GraphFile,
1218    node: &Node,
1219    full: bool,
1220    query: Option<&str>,
1221) -> String {
1222    let mut lines = Vec::new();
1223    lines.push(format!(
1224        "# {} | {} [{}]",
1225        node.id,
1226        escape_cli_text(&node.name),
1227        node.r#type
1228    ));
1229
1230    if !node.properties.alias.is_empty() {
1231        lines.push(format!(
1232            "aka: {}",
1233            node.properties
1234                .alias
1235                .iter()
1236                .map(|alias| escape_cli_text(alias))
1237                .collect::<Vec<_>>()
1238                .join(", ")
1239        ));
1240    }
1241    push_description_line(
1242        &mut lines,
1243        &node.properties.description,
1244        if full { None } else { Some(200) },
1245    );
1246    if full {
1247        if !node.properties.domain_area.is_empty() {
1248            lines.push(format!(
1249                "domain_area: {}",
1250                escape_cli_text(&node.properties.domain_area)
1251            ));
1252        }
1253        if !node.properties.provenance.is_empty() {
1254            lines.push(format!(
1255                "provenance: {}",
1256                escape_cli_text(&node.properties.provenance)
1257            ));
1258        }
1259        if let Some(confidence) = node.properties.confidence {
1260            lines.push(format!("confidence: {confidence}"));
1261        }
1262        lines.push(format!("importance: {}", node.properties.importance));
1263        if !node.properties.created_at.is_empty() {
1264            lines.push(format!("created_at: {}", node.properties.created_at));
1265        }
1266    }
1267
1268    let facts_to_show = if full {
1269        node.properties.key_facts.len()
1270    } else {
1271        node.properties.key_facts.len().min(2)
1272    };
1273    for fact in node.properties.key_facts.iter().take(facts_to_show) {
1274        lines.push(format!("- {}", escape_cli_text(fact)));
1275    }
1276    let omitted = node
1277        .properties
1278        .key_facts
1279        .len()
1280        .saturating_sub(facts_to_show);
1281    if omitted > 0 {
1282        lines.push(format!("... {omitted} more facts omitted"));
1283    }
1284
1285    if full {
1286        if !node.source_files.is_empty() {
1287            lines.push(format!(
1288                "sources: {}",
1289                node.source_files
1290                    .iter()
1291                    .map(|source| escape_cli_text(source))
1292                    .collect::<Vec<_>>()
1293                    .join(", ")
1294            ));
1295        }
1296        push_feedback_lines(
1297            &mut lines,
1298            node.properties.feedback_score,
1299            node.properties.feedback_count,
1300            node.properties.feedback_last_ts_ms,
1301            None,
1302        );
1303    }
1304
1305    let attached_notes: Vec<_> = graph
1306        .notes
1307        .iter()
1308        .filter(|note| note.node_id == node.id)
1309        .collect();
1310    if full && !attached_notes.is_empty() {
1311        lines.push(format!("notes: {}", attached_notes.len()));
1312        for note in attached_notes {
1313            lines.extend(render_attached_note_lines(note));
1314        }
1315    }
1316
1317    for edge in outgoing_edges(graph, &node.id, full, query) {
1318        if let Some(target) = graph.node_by_id(&edge.target_id) {
1319            lines.extend(render_edge_lines("->", edge, target, full));
1320        }
1321    }
1322    for edge in incoming_edges(graph, &node.id, full, query) {
1323        if let Some(source) = graph.node_by_id(&edge.source_id) {
1324            lines.extend(render_edge_lines("<-", edge, source, full));
1325        }
1326    }
1327
1328    lines.join("\n")
1329}
1330
1331fn outgoing_edges<'a>(
1332    graph: &'a GraphFile,
1333    node_id: &str,
1334    full: bool,
1335    query: Option<&str>,
1336) -> Vec<&'a Edge> {
1337    let mut edges: Vec<&Edge> = graph
1338        .edges
1339        .iter()
1340        .filter(|edge| edge.source_id == node_id)
1341        .collect();
1342    if let Some(query) = query {
1343        let query_terms = text_norm::expand_query_terms(query);
1344        if !query_terms.is_empty() {
1345            edges.sort_by(|left, right| {
1346                let right_score = directed_edge_query_relevance(graph, right, false, &query_terms);
1347                let left_score = directed_edge_query_relevance(graph, left, false, &query_terms);
1348                right_score
1349                    .cmp(&left_score)
1350                    .then_with(|| left.relation.cmp(&right.relation))
1351                    .then_with(|| left.target_id.cmp(&right.target_id))
1352            });
1353        } else {
1354            edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1355        }
1356    } else {
1357        edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1358    }
1359    if !full {
1360        edges.truncate(3);
1361    }
1362    edges
1363}
1364
1365fn incoming_edges<'a>(
1366    graph: &'a GraphFile,
1367    node_id: &str,
1368    full: bool,
1369    query: Option<&str>,
1370) -> Vec<&'a Edge> {
1371    let mut edges: Vec<&Edge> = graph
1372        .edges
1373        .iter()
1374        .filter(|edge| edge.target_id == node_id)
1375        .collect();
1376    if let Some(query) = query {
1377        let query_terms = text_norm::expand_query_terms(query);
1378        if !query_terms.is_empty() {
1379            edges.sort_by(|left, right| {
1380                let right_score = directed_edge_query_relevance(graph, right, true, &query_terms);
1381                let left_score = directed_edge_query_relevance(graph, left, true, &query_terms);
1382                right_score
1383                    .cmp(&left_score)
1384                    .then_with(|| left.relation.cmp(&right.relation))
1385                    .then_with(|| left.source_id.cmp(&right.source_id))
1386            });
1387        } else {
1388            edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1389        }
1390    } else {
1391        edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1392    }
1393    if !full {
1394        edges.truncate(3);
1395    }
1396    edges
1397}
1398
1399fn directed_edge_query_relevance(
1400    graph: &GraphFile,
1401    edge: &Edge,
1402    incoming: bool,
1403    query_terms: &[String],
1404) -> i64 {
1405    let related = if incoming {
1406        graph.node_by_id(&edge.source_id)
1407    } else {
1408        graph.node_by_id(&edge.target_id)
1409    };
1410    let mut score = query_overlap_score(&edge.relation, query_terms, 2)
1411        + query_overlap_score(&edge.properties.detail, query_terms, 2);
1412    if let Some(node) = related {
1413        score += query_overlap_score(&node.id, query_terms, 6);
1414        score += query_overlap_score(&node.name, query_terms, 5);
1415        score += query_overlap_score(&node.properties.description, query_terms, 2);
1416        for alias in &node.properties.alias {
1417            score += query_overlap_score(alias, query_terms, 4);
1418        }
1419    }
1420    score
1421}
1422
1423fn render_edge_lines(prefix: &str, edge: &Edge, related: &Node, full: bool) -> Vec<String> {
1424    let (arrow, relation) = if edge.relation.starts_with("NOT_") {
1425        (
1426            format!("{prefix}!"),
1427            edge.relation.trim_start_matches("NOT_"),
1428        )
1429    } else {
1430        (prefix.to_owned(), edge.relation.as_str())
1431    };
1432
1433    let mut line = format!(
1434        "{arrow} {relation} | {} | {}",
1435        related.id,
1436        escape_cli_text(&related.name)
1437    );
1438    if !edge.properties.detail.is_empty() {
1439        line.push_str(" | ");
1440        let detail = escape_cli_text(&edge.properties.detail);
1441        if full {
1442            line.push_str(&detail);
1443        } else {
1444            line.push_str(&truncate(&detail, 80));
1445        }
1446    }
1447    let mut lines = vec![line];
1448    if full {
1449        push_feedback_lines(
1450            &mut lines,
1451            edge.properties.feedback_score,
1452            edge.properties.feedback_count,
1453            edge.properties.feedback_last_ts_ms,
1454            Some("edge_"),
1455        );
1456        if !edge.properties.valid_from.is_empty() {
1457            lines.push(format!("edge_valid_from: {}", edge.properties.valid_from));
1458        }
1459        if !edge.properties.valid_to.is_empty() {
1460            lines.push(format!("edge_valid_to: {}", edge.properties.valid_to));
1461        }
1462    }
1463    lines
1464}
1465
1466fn truncate(value: &str, max_len: usize) -> String {
1467    let char_count = value.chars().count();
1468    if char_count <= max_len {
1469        return value.to_owned();
1470    }
1471    let truncated: String = value.chars().take(max_len.saturating_sub(3)).collect();
1472    format!("{truncated}...")
1473}
1474
1475fn escape_cli_text(value: &str) -> String {
1476    let mut out = String::new();
1477    for ch in value.chars() {
1478        match ch {
1479            '\\' => out.push_str("\\\\"),
1480            '\n' => out.push_str("\\n"),
1481            '\r' => out.push_str("\\r"),
1482            '\t' => out.push_str("\\t"),
1483            _ => out.push(ch),
1484        }
1485    }
1486    out
1487}
1488
1489fn push_description_line(lines: &mut Vec<String>, description: &str, max_len: Option<usize>) {
1490    if description.is_empty() {
1491        return;
1492    }
1493    let escaped = escape_cli_text(description);
1494    let rendered = match max_len {
1495        Some(limit) => truncate(&escaped, limit),
1496        None => escaped,
1497    };
1498    lines.push(format!("desc: {rendered}"));
1499}
1500
1501fn push_feedback_lines(
1502    lines: &mut Vec<String>,
1503    score: f64,
1504    count: u64,
1505    last_ts_ms: Option<u64>,
1506    prefix: Option<&str>,
1507) {
1508    let prefix = prefix.unwrap_or("");
1509    if score != 0.0 {
1510        lines.push(format!("{prefix}feedback_score: {score}"));
1511    }
1512    if count != 0 {
1513        lines.push(format!("{prefix}feedback_count: {count}"));
1514    }
1515    if let Some(ts) = last_ts_ms {
1516        lines.push(format!("{prefix}feedback_last_ts_ms: {ts}"));
1517    }
1518}
1519
1520fn render_attached_note_lines(note: &crate::graph::Note) -> Vec<String> {
1521    let mut lines = vec![format!("! {}", note.id)];
1522    if !note.body.is_empty() {
1523        lines.push(format!("note_body: {}", escape_cli_text(&note.body)));
1524    }
1525    if !note.tags.is_empty() {
1526        lines.push(format!(
1527            "note_tags: {}",
1528            note.tags
1529                .iter()
1530                .map(|tag| escape_cli_text(tag))
1531                .collect::<Vec<_>>()
1532                .join(", ")
1533        ));
1534    }
1535    if !note.author.is_empty() {
1536        lines.push(format!("note_author: {}", escape_cli_text(&note.author)));
1537    }
1538    if !note.created_at.is_empty() {
1539        lines.push(format!("note_created_at: {}", note.created_at));
1540    }
1541    if !note.provenance.is_empty() {
1542        lines.push(format!(
1543            "note_provenance: {}",
1544            escape_cli_text(&note.provenance)
1545        ));
1546    }
1547    if !note.source_files.is_empty() {
1548        lines.push(format!(
1549            "note_sources: {}",
1550            note.source_files
1551                .iter()
1552                .map(|source| escape_cli_text(source))
1553                .collect::<Vec<_>>()
1554                .join(", ")
1555        ));
1556    }
1557    lines
1558}
1559
1560fn find_matches_with_index<'a>(
1561    graph: &'a GraphFile,
1562    query: &str,
1563    limit: usize,
1564    include_features: bool,
1565    include_metadata: bool,
1566    mode: FindMode,
1567    index: Option<&Bm25Index>,
1568    tune: Option<&FindTune>,
1569) -> Vec<ScoredNode<'a>> {
1570    let mut matches = find_all_matches_with_index(
1571        graph,
1572        query,
1573        include_features,
1574        include_metadata,
1575        mode,
1576        index,
1577        tune,
1578    );
1579    matches.truncate(limit);
1580    matches
1581}
1582
1583fn find_all_matches_with_index<'a>(
1584    graph: &'a GraphFile,
1585    query: &str,
1586    include_features: bool,
1587    include_metadata: bool,
1588    mode: FindMode,
1589    index: Option<&Bm25Index>,
1590    tune: Option<&FindTune>,
1591) -> Vec<ScoredNode<'a>> {
1592    let context = FindQueryContext::build(graph);
1593    let rewritten_query = rewrite_query(query);
1594    let fuzzy_query = if rewritten_query.is_empty() {
1595        query.to_owned()
1596    } else {
1597        rewritten_query
1598    };
1599    let mut scored: Vec<ScoredNode<'a>> = match mode {
1600        FindMode::Fuzzy => {
1601            let pattern = Pattern::parse(&fuzzy_query, CaseMatching::Ignore, Normalization::Smart);
1602            let mut matcher = Matcher::new(Config::DEFAULT);
1603            let candidates = graph
1604                .nodes
1605                .iter()
1606                .filter(|node| node_is_searchable(node, include_features, include_metadata))
1607                .filter_map(|node| {
1608                    score_node(&context, node, &fuzzy_query, &pattern, &mut matcher).map(|score| {
1609                        RawCandidate {
1610                            node,
1611                            raw_relevance: score as f64,
1612                            lexical_boost: 0,
1613                        }
1614                    })
1615                })
1616                .collect();
1617            compose_scores(candidates)
1618        }
1619        FindMode::Bm25 => compose_scores(score_bm25_raw(
1620            graph,
1621            &context,
1622            &fuzzy_query,
1623            include_features,
1624            include_metadata,
1625            index,
1626        )),
1627        FindMode::Hybrid => compose_scores(score_hybrid_raw(
1628            graph,
1629            &context,
1630            &fuzzy_query,
1631            include_features,
1632            include_metadata,
1633            index,
1634            tune.copied().unwrap_or_default(),
1635        )),
1636    };
1637
1638    scored.sort_by(|left, right| {
1639        right
1640            .score
1641            .cmp(&left.score)
1642            .then_with(|| left.node.id.cmp(&right.node.id))
1643    });
1644    let mut seen_ids = HashSet::new();
1645    scored.retain(|item| {
1646        let key = crate::validate::normalize_node_id(&item.node.id).to_ascii_lowercase();
1647        seen_ids.insert(key)
1648    });
1649    scored
1650}
1651
1652fn compose_scores<'a>(candidates: Vec<RawCandidate<'a>>) -> Vec<ScoredNode<'a>> {
1653    let max_raw = candidates
1654        .iter()
1655        .map(|candidate| candidate.raw_relevance)
1656        .fold(0.0f64, f64::max);
1657    let max_raw_log = max_raw.ln_1p();
1658
1659    candidates
1660        .into_iter()
1661        .filter_map(|candidate| {
1662            if candidate.raw_relevance <= 0.0 {
1663                return None;
1664            }
1665            let normalized_relevance = if max_raw_log > 0.0 {
1666                ((candidate.raw_relevance.ln_1p() / max_raw_log) * 1000.0).round() as i64
1667            } else {
1668                0
1669            };
1670            let feedback = feedback_boost(candidate.node);
1671            let importance = importance_boost(candidate.node);
1672            let authority_raw = feedback + importance;
1673            let relative_cap =
1674                ((normalized_relevance as f64) * SCORE_META_MAX_RATIO).round() as i64;
1675            let authority_cap = relative_cap.max(SCORE_META_MIN_CAP).min(SCORE_META_MAX_CAP);
1676            let authority_applied = authority_raw.clamp(-authority_cap, authority_cap);
1677            let final_score = normalized_relevance + authority_applied;
1678
1679            Some(ScoredNode {
1680                score: final_score,
1681                node: candidate.node,
1682                breakdown: ScoreBreakdown {
1683                    raw_relevance: candidate.raw_relevance,
1684                    normalized_relevance,
1685                    lexical_boost: candidate.lexical_boost,
1686                    feedback_boost: feedback,
1687                    importance_boost: importance,
1688                    authority_raw,
1689                    authority_applied,
1690                    authority_cap,
1691                },
1692            })
1693        })
1694        .collect()
1695}
1696
1697fn feedback_boost(node: &Node) -> i64 {
1698    let count = node.properties.feedback_count as f64;
1699    if count <= 0.0 {
1700        return 0;
1701    }
1702    let avg = node.properties.feedback_score / count;
1703    let confidence = (count.ln_1p() / 3.0).min(1.0);
1704    let scaled = avg * 200.0 * confidence;
1705    scaled.clamp(-300.0, 300.0).round() as i64
1706}
1707
1708fn importance_boost(node: &Node) -> i64 {
1709    let normalized_importance = if (0.0..=1.0).contains(&node.properties.importance) {
1710        node.properties.importance
1711    } else if (1.0..=6.0).contains(&node.properties.importance) {
1712        (node.properties.importance - 1.0) / 5.0
1713    } else {
1714        node.properties.importance.clamp(0.0, 1.0)
1715    };
1716    let normalized = (normalized_importance - IMPORTANCE_NEUTRAL) * 2.0;
1717    (normalized * IMPORTANCE_MAX_ABS_BOOST).round() as i64
1718}
1719
1720fn score_bm25_raw<'a>(
1721    graph: &'a GraphFile,
1722    context: &FindQueryContext<'a>,
1723    query: &str,
1724    include_features: bool,
1725    include_metadata: bool,
1726    index: Option<&Bm25Index>,
1727) -> Vec<RawCandidate<'a>> {
1728    let terms = text_norm::expand_query_terms(query);
1729    if terms.is_empty() {
1730        return Vec::new();
1731    }
1732
1733    if let Some(idx) = index {
1734        let results = idx.search(&terms, graph);
1735        return results
1736            .into_iter()
1737            .filter_map(|(node_id, score)| {
1738                let node = graph.node_by_id(&node_id)?;
1739                if !node_is_searchable(node, include_features, include_metadata) {
1740                    return None;
1741                }
1742                let self_terms = node_self_document_terms(context, node);
1743                let neighbor_score =
1744                    best_neighbor_bm25_score_with_index(context, node, &terms, idx);
1745                let base_score = combine_bm25_components(node, score as f64, neighbor_score);
1746                if base_score <= 0.0 {
1747                    return None;
1748                }
1749                let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1750                    idx.idf.get(term).copied().unwrap_or(0.0) as f64
1751                });
1752                let proximity_boost = bm25_proximity_boost(context, node, &terms);
1753                Some(RawCandidate {
1754                    node,
1755                    raw_relevance: base_score * 100.0
1756                        + lexical_boost as f64
1757                        + proximity_boost as f64,
1758                    lexical_boost: lexical_boost + proximity_boost,
1759                })
1760            })
1761            .collect();
1762    }
1763
1764    let docs: Vec<(&'a Node, Vec<String>)> = graph
1765        .nodes
1766        .iter()
1767        .filter(|node| node_is_searchable(node, include_features, include_metadata))
1768        .map(|node| (node, node_self_document_terms(context, node)))
1769        .collect();
1770
1771    if docs.is_empty() {
1772        return Vec::new();
1773    }
1774
1775    let mut df: HashMap<String, usize> = HashMap::new();
1776    for term in &terms {
1777        let mut count = 0usize;
1778        for (_, tokens) in &docs {
1779            if tokens.iter().any(|t| t == term) {
1780                count += 1;
1781            }
1782        }
1783        df.insert(term.clone(), count);
1784    }
1785
1786    let total_docs = docs.len() as f64;
1787    let avgdl = docs
1788        .iter()
1789        .map(|(_, tokens)| tokens.len() as f64)
1790        .sum::<f64>()
1791        / total_docs.max(1.0);
1792
1793    let mut idf_by_term: HashMap<String, f64> = HashMap::new();
1794    for term in &terms {
1795        let df_t = *df.get(term).unwrap_or(&0) as f64;
1796        let idf = (1.0 + (total_docs - df_t + 0.5) / (df_t + 0.5)).ln();
1797        idf_by_term.insert(term.clone(), idf);
1798    }
1799
1800    let mut scored = Vec::new();
1801
1802    for (node, self_terms) in docs {
1803        let self_score = bm25_document_score(&terms, &self_terms, &idf_by_term, avgdl);
1804        let neighbor_score = best_neighbor_bm25_score(context, node, &terms, &idf_by_term, avgdl);
1805        let base_score = combine_bm25_components(node, self_score, neighbor_score);
1806        if base_score <= 0.0 {
1807            continue;
1808        }
1809        let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1810            idf_by_term.get(term).copied().unwrap_or(0.0)
1811        });
1812        let proximity_boost = bm25_proximity_boost(context, node, &terms);
1813        scored.push(RawCandidate {
1814            node,
1815            raw_relevance: base_score * 100.0 + lexical_boost as f64 + proximity_boost as f64,
1816            lexical_boost: lexical_boost + proximity_boost,
1817        });
1818    }
1819
1820    scored
1821}
1822
1823fn score_hybrid_raw<'a>(
1824    graph: &'a GraphFile,
1825    context: &FindQueryContext<'a>,
1826    query: &str,
1827    include_features: bool,
1828    include_metadata: bool,
1829    index: Option<&Bm25Index>,
1830    tune: FindTune,
1831) -> Vec<RawCandidate<'a>> {
1832    let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
1833    let mut matcher = Matcher::new(Config::DEFAULT);
1834
1835    let mut fuzzy_raw = HashMap::new();
1836    for node in graph
1837        .nodes
1838        .iter()
1839        .filter(|node| node_is_searchable(node, include_features, include_metadata))
1840    {
1841        if let Some(score) = score_node(context, node, query, &pattern, &mut matcher) {
1842            fuzzy_raw.insert(node.id.as_str(), score as f64);
1843        }
1844    }
1845
1846    let bm25_candidates = score_bm25_raw(
1847        graph,
1848        context,
1849        query,
1850        include_features,
1851        include_metadata,
1852        index,
1853    );
1854    let mut bm25_raw = HashMap::new();
1855    let mut lexical_boost = HashMap::new();
1856    for candidate in bm25_candidates {
1857        bm25_raw.insert(candidate.node.id.as_str(), candidate.raw_relevance);
1858        lexical_boost.insert(candidate.node.id.as_str(), candidate.lexical_boost);
1859    }
1860
1861    let fuzzy_norm = normalize_raw_scores(&fuzzy_raw);
1862    let bm25_norm = normalize_raw_scores(&bm25_raw);
1863    let total_weight = (tune.bm25 + tune.fuzzy).max(0.0001);
1864
1865    graph
1866        .nodes
1867        .iter()
1868        .filter(|node| node_is_searchable(node, include_features, include_metadata))
1869        .filter_map(|node| {
1870            let f = fuzzy_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1871            let b = bm25_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1872            let combined = ((tune.fuzzy * f) + (tune.bm25 * b)) / total_weight;
1873            if combined <= 0.0 {
1874                return None;
1875            }
1876            Some(RawCandidate {
1877                node,
1878                raw_relevance: combined * 1000.0,
1879                lexical_boost: lexical_boost.get(node.id.as_str()).copied().unwrap_or(0),
1880            })
1881        })
1882        .collect()
1883}
1884
1885fn normalize_raw_scores<'a>(raw: &'a HashMap<&'a str, f64>) -> HashMap<&'a str, f64> {
1886    let max_raw = raw.values().copied().fold(0.0f64, f64::max);
1887    let max_log = max_raw.ln_1p();
1888    raw.iter()
1889        .map(|(id, value)| {
1890            let normalized = if max_log > 0.0 {
1891                value.ln_1p() / max_log
1892            } else {
1893                0.0
1894            };
1895            (*id, normalized.clamp(0.0, 1.0))
1896        })
1897        .collect()
1898}
1899
1900fn node_is_searchable(node: &Node, include_features: bool, include_metadata: bool) -> bool {
1901    (include_features || node.r#type != "Feature") && (include_metadata || node.r#type != "^")
1902}
1903
1904fn node_self_document_terms(context: &FindQueryContext<'_>, node: &Node) -> Vec<String> {
1905    let mut tokens = Vec::new();
1906    push_terms(&mut tokens, &node.id, BM25_ID_WEIGHT);
1907    push_terms(&mut tokens, &node.name, BM25_NAME_WEIGHT);
1908    push_terms(
1909        &mut tokens,
1910        &node.properties.description,
1911        BM25_DESCRIPTION_WEIGHT,
1912    );
1913    for alias in &node.properties.alias {
1914        push_terms(&mut tokens, alias, BM25_ALIAS_WEIGHT);
1915    }
1916    for fact in &node.properties.key_facts {
1917        push_terms(&mut tokens, fact, BM25_FACT_WEIGHT);
1918    }
1919    for note in context.notes_for(&node.id) {
1920        push_terms(&mut tokens, &note.body, BM25_NOTE_BODY_WEIGHT);
1921        for tag in &note.tags {
1922            push_terms(&mut tokens, tag, BM25_NOTE_TAG_WEIGHT);
1923        }
1924    }
1925    tokens
1926}
1927
1928fn neighbor_document_terms(neighbor: &Node) -> Vec<String> {
1929    let mut tokens = Vec::new();
1930    push_terms(&mut tokens, &neighbor.id, BM25_NEIGHBOR_WEIGHT);
1931    push_terms(&mut tokens, &neighbor.name, BM25_NEIGHBOR_WEIGHT);
1932    push_terms(
1933        &mut tokens,
1934        &neighbor.properties.description,
1935        BM25_NEIGHBOR_WEIGHT,
1936    );
1937    for alias in &neighbor.properties.alias {
1938        push_terms(&mut tokens, alias, BM25_NEIGHBOR_WEIGHT);
1939    }
1940    tokens
1941}
1942
1943fn fact_volume_normalizer(node: &Node) -> f64 {
1944    let fact_chars = node
1945        .properties
1946        .key_facts
1947        .iter()
1948        .map(|fact| fact.chars().count())
1949        .sum::<usize>() as f64;
1950    if fact_chars <= 0.0 {
1951        return 1.0;
1952    }
1953    let scaled = FACT_VOLUME_BASE_CHARS.sqrt() / fact_chars.sqrt();
1954    scaled.clamp(FACT_VOLUME_MIN_FACTOR, 1.0)
1955}
1956
1957fn bm25_document_score(
1958    query_terms: &[String],
1959    document_terms: &[String],
1960    idf_by_term: &HashMap<String, f64>,
1961    avgdl: f64,
1962) -> f64 {
1963    if query_terms.is_empty() || document_terms.is_empty() {
1964        return 0.0;
1965    }
1966    let dl = document_terms.len() as f64;
1967    if dl <= 0.0 {
1968        return 0.0;
1969    }
1970    let mut score = 0.0;
1971    for term in query_terms {
1972        let tf = document_terms.iter().filter(|token| *token == term).count() as f64;
1973        if tf <= 0.0 {
1974            continue;
1975        }
1976        let idf = idf_by_term.get(term).copied().unwrap_or(0.0);
1977        if idf <= 0.0 {
1978            continue;
1979        }
1980        let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
1981        score += idf * (tf * (BM25_K1 + 1.0) / denom);
1982    }
1983    score
1984}
1985
1986fn best_neighbor_bm25_score(
1987    context: &FindQueryContext<'_>,
1988    node: &Node,
1989    query_terms: &[String],
1990    idf_by_term: &HashMap<String, f64>,
1991    avgdl: f64,
1992) -> f64 {
1993    context
1994        .neighbors_for(&node.id)
1995        .iter()
1996        .map(|neighbor| {
1997            let neighbor_terms = neighbor_document_terms(neighbor);
1998            bm25_document_score(query_terms, &neighbor_terms, idf_by_term, avgdl)
1999        })
2000        .fold(0.0f64, f64::max)
2001}
2002
2003fn best_neighbor_bm25_score_with_index(
2004    context: &FindQueryContext<'_>,
2005    node: &Node,
2006    query_terms: &[String],
2007    index: &Bm25Index,
2008) -> f64 {
2009    let avgdl = index.avg_doc_len as f64;
2010    context
2011        .neighbors_for(&node.id)
2012        .iter()
2013        .map(|neighbor| {
2014            let neighbor_terms = neighbor_document_terms(neighbor);
2015            let dl = neighbor_terms.len() as f64;
2016            if dl <= 0.0 {
2017                return 0.0;
2018            }
2019            let mut score = 0.0;
2020            for term in query_terms {
2021                let idf = index.idf.get(term).copied().unwrap_or(0.0) as f64;
2022                if idf <= 0.0 {
2023                    continue;
2024                }
2025                let tf = neighbor_terms.iter().filter(|token| *token == term).count() as f64;
2026                if tf <= 0.0 {
2027                    continue;
2028                }
2029                let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
2030                score += idf * (tf * (BM25_K1 + 1.0) / denom);
2031            }
2032            score
2033        })
2034        .fold(0.0f64, f64::max)
2035}
2036
2037fn combine_bm25_components(node: &Node, self_score: f64, neighbor_score: f64) -> f64 {
2038    let combined =
2039        BM25_SELF_CONTEXT_WEIGHT * self_score + BM25_NEIGHBOR_CONTEXT_WEIGHT * neighbor_score;
2040    combined * fact_volume_normalizer(node)
2041}
2042
2043fn push_terms(target: &mut Vec<String>, value: &str, weight: usize) {
2044    if value.is_empty() {
2045        return;
2046    }
2047    let terms = tokenize(value);
2048    for _ in 0..weight {
2049        target.extend(terms.iter().cloned());
2050    }
2051}
2052
2053fn tokenize(text: &str) -> Vec<String> {
2054    text_norm::tokenize(text)
2055}
2056
2057fn rewrite_query(query: &str) -> String {
2058    text_norm::expand_query_terms(query).join(" ")
2059}
2060
2061fn bm25_lexical_boost_with_idf<F>(
2062    query_terms: &[String],
2063    document_terms: &[String],
2064    idf_for: F,
2065) -> i64
2066where
2067    F: Fn(&str) -> f64,
2068{
2069    if query_terms.is_empty() || document_terms.is_empty() {
2070        return 0;
2071    }
2072    if query_terms.len() > 1 && contains_token_phrase(document_terms, query_terms) {
2073        return BM25_PHRASE_MATCH_BOOST;
2074    }
2075    let document_vocab: HashSet<&str> = document_terms.iter().map(String::as_str).collect();
2076    let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2077    let mut total_idf = 0.0;
2078    let mut matched_idf = 0.0;
2079    let mut matched_terms = 0i64;
2080    for term in query_vocab {
2081        let idf = idf_for(term).max(0.0);
2082        total_idf += if idf > 0.0 { idf } else { 1.0 };
2083        if document_vocab.contains(term) {
2084            matched_terms += 1;
2085            matched_idf += if idf > 0.0 { idf } else { 1.0 };
2086        }
2087    }
2088    if matched_terms == 0 {
2089        return 0;
2090    }
2091    ((matched_idf / total_idf.max(1.0)) * BM25_TOKEN_MATCH_BOOST as f64).round() as i64
2092}
2093
2094fn bm25_proximity_boost(
2095    context: &FindQueryContext<'_>,
2096    node: &Node,
2097    query_terms: &[String],
2098) -> i64 {
2099    if query_terms.len() < 2 {
2100        return 0;
2101    }
2102    let mut best_span_hits = proximity_hits_in_text(&node.id, query_terms)
2103        .max(proximity_hits_in_text(&node.name, query_terms))
2104        .max(proximity_hits_in_text(
2105            &node.properties.description,
2106            query_terms,
2107        ));
2108    for alias in &node.properties.alias {
2109        best_span_hits = best_span_hits.max(proximity_hits_in_text(alias, query_terms));
2110    }
2111    for fact in &node.properties.key_facts {
2112        best_span_hits = best_span_hits.max(proximity_hits_in_text(fact, query_terms));
2113    }
2114    for note in context.notes_for(&node.id) {
2115        best_span_hits = best_span_hits.max(proximity_hits_in_text(&note.body, query_terms));
2116        for tag in &note.tags {
2117            best_span_hits = best_span_hits.max(proximity_hits_in_text(tag, query_terms));
2118        }
2119    }
2120    if best_span_hits < 2 {
2121        0
2122    } else {
2123        BM25_PROXIMITY_MATCH_BOOST + (best_span_hits as i64 - 2) * 20
2124    }
2125}
2126
2127fn proximity_hits_in_text(value: &str, query_terms: &[String]) -> usize {
2128    if value.is_empty() || query_terms.len() < 2 {
2129        return 0;
2130    }
2131    let tokens = tokenize(value);
2132    if tokens.len() < 2 {
2133        return 0;
2134    }
2135    let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2136    let mut best = 0usize;
2137    for start in 0..tokens.len() {
2138        let end = (start + BM25_PROXIMITY_WINDOW_TOKENS).min(tokens.len());
2139        let mut seen: HashSet<&str> = HashSet::new();
2140        for token in &tokens[start..end] {
2141            if query_vocab.contains(token.as_str()) {
2142                seen.insert(token.as_str());
2143            }
2144        }
2145        best = best.max(seen.len());
2146    }
2147    best
2148}
2149
2150fn contains_token_phrase(document_terms: &[String], query_terms: &[String]) -> bool {
2151    if query_terms.is_empty() || query_terms.len() > document_terms.len() {
2152        return false;
2153    }
2154    document_terms
2155        .windows(query_terms.len())
2156        .any(|window| window == query_terms)
2157}
2158
2159fn score_node(
2160    context: &FindQueryContext<'_>,
2161    node: &Node,
2162    query: &str,
2163    pattern: &Pattern,
2164    matcher: &mut Matcher,
2165) -> Option<u32> {
2166    let mut primary_score = 0;
2167    let mut primary_hits = 0;
2168
2169    let id_score = score_primary_field(query, pattern, matcher, &node.id, 5);
2170    if id_score > 0 {
2171        primary_hits += 1;
2172    }
2173    primary_score += id_score;
2174
2175    let name_score = score_primary_field(query, pattern, matcher, &node.name, 4);
2176    if name_score > 0 {
2177        primary_hits += 1;
2178    }
2179    primary_score += name_score;
2180
2181    for alias in &node.properties.alias {
2182        let alias_score = score_primary_field(query, pattern, matcher, alias, 4);
2183        if alias_score > 0 {
2184            primary_hits += 1;
2185        }
2186        primary_score += alias_score;
2187    }
2188
2189    let mut contextual_score = score_secondary_field(
2190        query,
2191        pattern,
2192        matcher,
2193        &node.properties.description,
2194        FUZZY_DESCRIPTION_WEIGHT,
2195    );
2196    let mut facts_score = 0;
2197    for fact in &node.properties.key_facts {
2198        facts_score += score_secondary_field(query, pattern, matcher, fact, FUZZY_FACT_WEIGHT);
2199    }
2200    let facts_factor = fact_volume_normalizer(node);
2201    contextual_score += ((facts_score as f64) * facts_factor).round() as u32;
2202    contextual_score += score_notes_context(context, node, query, pattern, matcher);
2203
2204    let neighbor_context = score_neighbor_context(context, node, query, pattern, matcher)
2205        .min(FUZZY_NEIGHBOR_CONTEXT_CAP);
2206    contextual_score += neighbor_context / FUZZY_NEIGHBOR_CONTEXT_DIVISOR;
2207
2208    if primary_hits == 0 {
2209        contextual_score /= FUZZY_NO_PRIMARY_CONTEXT_DIVISOR;
2210    }
2211
2212    let total = primary_score + contextual_score;
2213    (total > 0).then_some(total)
2214}
2215
2216fn score_notes_context(
2217    context: &FindQueryContext<'_>,
2218    node: &Node,
2219    query: &str,
2220    pattern: &Pattern,
2221    matcher: &mut Matcher,
2222) -> u32 {
2223    let mut total = 0;
2224    for note in context.notes_for(&node.id) {
2225        total += score_secondary_field(query, pattern, matcher, &note.body, FUZZY_NOTE_BODY_WEIGHT);
2226        for tag in &note.tags {
2227            total += score_secondary_field(query, pattern, matcher, tag, FUZZY_NOTE_TAG_WEIGHT);
2228        }
2229    }
2230    total
2231}
2232
2233fn score_neighbor_context(
2234    context: &FindQueryContext<'_>,
2235    node: &Node,
2236    query: &str,
2237    pattern: &Pattern,
2238    matcher: &mut Matcher,
2239) -> u32 {
2240    let mut best = 0;
2241
2242    for neighbor in context.neighbors_for(&node.id) {
2243        let mut score = score_secondary_field(query, pattern, matcher, &neighbor.id, 1)
2244            + score_secondary_field(query, pattern, matcher, &neighbor.name, 1)
2245            + score_secondary_field(query, pattern, matcher, &neighbor.properties.description, 1);
2246
2247        for alias in &neighbor.properties.alias {
2248            score += score_secondary_field(query, pattern, matcher, alias, 1);
2249        }
2250
2251        best = best.max(score);
2252    }
2253
2254    best
2255}
2256
2257fn score_field(pattern: &Pattern, matcher: &mut Matcher, value: &str) -> Option<u32> {
2258    if value.is_empty() {
2259        return None;
2260    }
2261    let mut buf = Vec::new();
2262    let haystack = Utf32Str::new(value, &mut buf);
2263    pattern.score(haystack, matcher)
2264}
2265
2266fn score_primary_field(
2267    query: &str,
2268    pattern: &Pattern,
2269    matcher: &mut Matcher,
2270    value: &str,
2271    weight: u32,
2272) -> u32 {
2273    let bonus = textual_bonus(query, value);
2274    let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2275    if bonus == 0 && fuzzy == 0 {
2276        return 0;
2277    }
2278    (fuzzy + bonus) * weight
2279}
2280
2281fn score_secondary_field(
2282    query: &str,
2283    pattern: &Pattern,
2284    matcher: &mut Matcher,
2285    value: &str,
2286    weight: u32,
2287) -> u32 {
2288    let bonus = textual_bonus(query, value);
2289    let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2290    if bonus == 0 && fuzzy == 0 {
2291        return 0;
2292    }
2293    (fuzzy + bonus / 2) * weight
2294}
2295
2296fn textual_bonus(query: &str, value: &str) -> u32 {
2297    let query = query.trim().to_lowercase();
2298    let value = value.to_lowercase();
2299
2300    if value == query {
2301        return 400;
2302    }
2303    if value.contains(&query) {
2304        return 200;
2305    }
2306
2307    query
2308        .split_whitespace()
2309        .map(|token| {
2310            if value.contains(token) {
2311                80
2312            } else if is_subsequence(token, &value) {
2313                40
2314            } else {
2315                0
2316            }
2317        })
2318        .sum()
2319}
2320
2321fn is_subsequence(needle: &str, haystack: &str) -> bool {
2322    if needle.is_empty() {
2323        return false;
2324    }
2325
2326    let mut chars = needle.chars();
2327    let mut current = match chars.next() {
2328        Some(ch) => ch,
2329        None => return false,
2330    };
2331
2332    for ch in haystack.chars() {
2333        if ch == current {
2334            match chars.next() {
2335                Some(next) => current = next,
2336                None => return true,
2337            }
2338        }
2339    }
2340
2341    false
2342}
2343
2344#[cfg(test)]
2345mod tests {
2346    use super::*;
2347
2348    fn make_node(
2349        id: &str,
2350        name: &str,
2351        description: &str,
2352        key_facts: &[&str],
2353        alias: &[&str],
2354        importance: f64,
2355        feedback_score: f64,
2356        feedback_count: u64,
2357    ) -> Node {
2358        let mut properties = crate::graph::NodeProperties::default();
2359        properties.description = description.to_owned();
2360        properties.key_facts = key_facts.iter().map(|v| (*v).to_owned()).collect();
2361        properties.alias = alias.iter().map(|v| (*v).to_owned()).collect();
2362        properties.importance = importance;
2363        properties.feedback_score = feedback_score;
2364        properties.feedback_count = feedback_count;
2365        Node {
2366            id: id.to_owned(),
2367            r#type: "Concept".to_owned(),
2368            name: name.to_owned(),
2369            properties,
2370            source_files: Vec::new(),
2371        }
2372    }
2373
2374    fn make_edge(source_id: &str, relation: &str, target_id: &str) -> Edge {
2375        Edge {
2376            source_id: source_id.to_owned(),
2377            relation: relation.to_owned(),
2378            target_id: target_id.to_owned(),
2379            properties: crate::graph::EdgeProperties::default(),
2380        }
2381    }
2382
2383    fn score_for(results: &[ScoredNode<'_>], id: &str) -> i64 {
2384        results
2385            .iter()
2386            .find(|item| item.node.id == id)
2387            .map(|item| item.score)
2388            .expect("score for node")
2389    }
2390
2391    #[test]
2392    fn textual_bonus_tiers_are_stable() {
2393        assert_eq!(textual_bonus("abc", "abc"), 400);
2394        assert_eq!(textual_bonus("abc", "xxabcxx"), 200);
2395        assert_eq!(textual_bonus("abc def", "aa abc and def zz"), 160);
2396        assert_eq!(textual_bonus("abc", "aXbYc"), 40);
2397        assert_eq!(textual_bonus("abc", "zzz"), 0);
2398    }
2399
2400    #[test]
2401    fn tokenize_handles_unicode_casefolding() {
2402        let tokens = tokenize("ŁÓDŹ smart-home");
2403        assert_eq!(tokens, vec!["łódź", "smart", "home"]);
2404    }
2405
2406    #[test]
2407    fn bm25_lexical_boost_prefers_phrase_then_tokens() {
2408        let query_terms = tokenize("smart home api");
2409        assert_eq!(
2410            bm25_lexical_boost_with_idf(&query_terms, &tokenize("x smart home api y"), |_| 1.0),
2411            120
2412        );
2413        assert_eq!(
2414            bm25_lexical_boost_with_idf(&query_terms, &tokenize("smart x api y home"), |_| 1.0),
2415            45
2416        );
2417        assert_eq!(
2418            bm25_lexical_boost_with_idf(&query_terms, &tokenize("nothing here"), |_| 1.0),
2419            0
2420        );
2421    }
2422
2423    #[test]
2424    fn score_node_uses_key_facts_and_notes_without_primary_match() {
2425        let node = make_node(
2426            "concept:gateway",
2427            "Gateway",
2428            "",
2429            &["Autentykacja OAuth2 przez konto producenta"],
2430            &[],
2431            0.5,
2432            0.0,
2433            0,
2434        );
2435        let mut graph = GraphFile::new("test");
2436        graph.nodes.push(node.clone());
2437        graph.notes.push(crate::graph::Note {
2438            id: "note:oauth".to_owned(),
2439            node_id: node.id.clone(),
2440            body: "Token refresh przez OAuth2".to_owned(),
2441            tags: vec!["oauth2".to_owned()],
2442            ..Default::default()
2443        });
2444
2445        let pattern = Pattern::parse(
2446            "oauth2 producenta",
2447            CaseMatching::Ignore,
2448            Normalization::Smart,
2449        );
2450        let context = FindQueryContext::build(&graph);
2451        let mut matcher = Matcher::new(Config::DEFAULT);
2452        let score = score_node(&context, &node, "oauth2 producenta", &pattern, &mut matcher);
2453        assert!(score.is_some_and(|value| value > 0));
2454
2455        let empty_graph = GraphFile::new("empty");
2456        let empty_node = make_node("concept:gateway", "Gateway", "", &[], &[], 0.5, 0.0, 0);
2457        let empty_context = FindQueryContext::build(&empty_graph);
2458        let mut matcher = Matcher::new(Config::DEFAULT);
2459        let empty_score = score_node(
2460            &empty_context,
2461            &empty_node,
2462            "oauth2 producenta",
2463            &pattern,
2464            &mut matcher,
2465        );
2466        assert!(empty_score.is_none());
2467    }
2468
2469    #[test]
2470    fn score_bm25_respects_importance_boost_for_equal_documents() {
2471        let mut graph = GraphFile::new("test");
2472        graph.nodes.push(make_node(
2473            "concept:high",
2474            "High",
2475            "smart home api",
2476            &[],
2477            &[],
2478            1.0,
2479            0.0,
2480            0,
2481        ));
2482        graph.nodes.push(make_node(
2483            "concept:low",
2484            "Low",
2485            "smart home api",
2486            &[],
2487            &[],
2488            0.0,
2489            0.0,
2490            0,
2491        ));
2492
2493        let results = find_all_matches_with_index(
2494            &graph,
2495            "smart home api",
2496            true,
2497            false,
2498            FindMode::Bm25,
2499            None,
2500            None,
2501        );
2502        let high_score = score_for(&results, "concept:high");
2503        let low_score = score_for(&results, "concept:low");
2504        assert!(high_score > low_score);
2505    }
2506
2507    #[test]
2508    fn bm25_prefers_self_match_over_neighbor_only_match() {
2509        let mut graph = GraphFile::new("test");
2510        graph.nodes.push(make_node(
2511            "concept:self_hit",
2512            "Batch plugin output directory",
2513            "",
2514            &["BatchPlugin OUTPUT_DIR rule in WebLogic path"],
2515            &[],
2516            0.5,
2517            0.0,
2518            0,
2519        ));
2520        graph.nodes.push(make_node(
2521            "concept:hub",
2522            "Integration Hub",
2523            "gateway for many systems",
2524            &[],
2525            &[],
2526            0.5,
2527            0.0,
2528            0,
2529        ));
2530        graph.nodes.push(make_node(
2531            "concept:neighbor_hit",
2532            "BatchPlugin OUTPUT_DIR in WebLogic",
2533            "",
2534            &[],
2535            &[],
2536            0.5,
2537            0.0,
2538            0,
2539        ));
2540        graph
2541            .edges
2542            .push(make_edge("concept:hub", "HAS", "concept:neighbor_hit"));
2543
2544        let results = find_all_matches_with_index(
2545            &graph,
2546            "BatchPlugin OUTPUT_DIR WebLogic",
2547            true,
2548            false,
2549            FindMode::Bm25,
2550            None,
2551            None,
2552        );
2553
2554        assert!(results.iter().any(|item| item.node.id == "concept:hub"));
2555        assert!(score_for(&results, "concept:self_hit") > score_for(&results, "concept:hub"));
2556    }
2557
2558    #[test]
2559    fn link_rendering_sorts_incident_edges_by_query_relevance() {
2560        let mut graph = GraphFile::new("test");
2561        graph.nodes.push(make_node(
2562            "concept:center",
2563            "Center",
2564            "",
2565            &[],
2566            &[],
2567            0.5,
2568            0.0,
2569            0,
2570        ));
2571        graph.nodes.push(make_node(
2572            "concept:relevant",
2573            "Push notification template",
2574            "",
2575            &[],
2576            &[],
2577            0.2,
2578            0.0,
2579            0,
2580        ));
2581        graph.nodes.push(make_node(
2582            "concept:irrelevant_a",
2583            "Billing ledger",
2584            "",
2585            &[],
2586            &[],
2587            0.9,
2588            0.0,
2589            0,
2590        ));
2591        graph.nodes.push(make_node(
2592            "concept:irrelevant_b",
2593            "Audit trail",
2594            "",
2595            &[],
2596            &[],
2597            0.8,
2598            0.0,
2599            0,
2600        ));
2601        graph
2602            .edges
2603            .push(make_edge("concept:center", "HAS", "concept:irrelevant_a"));
2604        graph
2605            .edges
2606            .push(make_edge("concept:center", "HAS", "concept:irrelevant_b"));
2607        graph
2608            .edges
2609            .push(make_edge("concept:center", "HAS", "concept:relevant"));
2610
2611        let center = graph.node_by_id("concept:center").expect("center node");
2612        let lines = render_node_link_lines(&graph, center, 2, Some("push notification template"));
2613
2614        let first_edge = lines
2615            .iter()
2616            .find(|line| line.starts_with("-> "))
2617            .expect("first edge line");
2618        assert!(first_edge.contains("concept:relevant"));
2619    }
2620
2621    #[test]
2622    fn final_score_caps_authority_boost_for_weak_relevance() {
2623        let weak = make_node(
2624            "concept:weak",
2625            "Weak",
2626            "smart home api",
2627            &[],
2628            &[],
2629            1.0,
2630            300.0,
2631            1,
2632        );
2633        let strong = make_node(
2634            "concept:strong",
2635            "Strong",
2636            "smart home api smart home api smart home api smart home api",
2637            &[],
2638            &[],
2639            0.5,
2640            0.0,
2641            0,
2642        );
2643        let candidates = vec![
2644            RawCandidate {
2645                node: &weak,
2646                raw_relevance: 12.0,
2647                lexical_boost: 0,
2648            },
2649            RawCandidate {
2650                node: &strong,
2651                raw_relevance: 100.0,
2652                lexical_boost: 0,
2653            },
2654        ];
2655        let scored = compose_scores(candidates);
2656        let weak_scored = scored
2657            .iter()
2658            .find(|item| item.node.id == "concept:weak")
2659            .expect("weak node");
2660        assert_eq!(
2661            weak_scored.breakdown.authority_applied,
2662            weak_scored.breakdown.authority_cap
2663        );
2664        assert!(weak_scored.breakdown.authority_raw > weak_scored.breakdown.authority_cap);
2665    }
2666
2667    #[test]
2668    fn importance_and_feedback_boost_have_expected_ranges() {
2669        let high_importance = make_node("concept:high", "High", "", &[], &[], 1.0, 0.0, 0);
2670        let low_importance = make_node("concept:low", "Low", "", &[], &[], 0.0, 0.0, 0);
2671        assert_eq!(importance_boost(&high_importance), 66);
2672        assert_eq!(importance_boost(&low_importance), -66);
2673
2674        let positive = make_node("concept:pos", "Pos", "", &[], &[], 0.5, 1.0, 1);
2675        let negative = make_node("concept:neg", "Neg", "", &[], &[], 0.5, -2.0, 1);
2676        let saturated = make_node("concept:sat", "Sat", "", &[], &[], 0.5, 300.0, 1);
2677        assert_eq!(feedback_boost(&positive), 46);
2678        assert_eq!(feedback_boost(&negative), -92);
2679        assert_eq!(feedback_boost(&saturated), 300);
2680    }
2681
2682    #[test]
2683    fn find_deduplicates_results_by_node_id_for_single_query() {
2684        let mut graph = GraphFile::new("test");
2685        graph.nodes.push(make_node(
2686            "concept:rule",
2687            "Business Rule",
2688            "Rule for billing decisions",
2689            &["Business rule validation"],
2690            &["billing rule"],
2691            0.5,
2692            0.0,
2693            0,
2694        ));
2695        graph.nodes.push(make_node(
2696            "concept:rule",
2697            "Business Rule Duplicate",
2698            "Duplicate record with same id",
2699            &["Business rule duplicate"],
2700            &[],
2701            0.5,
2702            0.0,
2703            0,
2704        ));
2705
2706        let results = find_all_matches_with_index(
2707            &graph,
2708            "business rule",
2709            true,
2710            false,
2711            FindMode::Hybrid,
2712            None,
2713            None,
2714        );
2715        let rule_hits = results
2716            .iter()
2717            .filter(|item| item.node.id == "concept:rule")
2718            .count();
2719        assert_eq!(rule_hits, 1);
2720    }
2721
2722    #[test]
2723    fn hybrid_score_does_not_change_when_only_vector_weight_changes() {
2724        let mut graph = GraphFile::new("test");
2725        graph.nodes.push(make_node(
2726            "concept:auth",
2727            "Authentication Rule",
2728            "Business rule for authentication",
2729            &["auth rule"],
2730            &["login policy"],
2731            0.5,
2732            0.0,
2733            0,
2734        ));
2735
2736        let with_vector = find_all_matches_with_index(
2737            &graph,
2738            "authentication rule",
2739            true,
2740            false,
2741            FindMode::Hybrid,
2742            None,
2743            Some(&FindTune {
2744                bm25: 0.55,
2745                fuzzy: 0.35,
2746                vector: 1.0,
2747            }),
2748        );
2749        let no_vector = find_all_matches_with_index(
2750            &graph,
2751            "authentication rule",
2752            true,
2753            false,
2754            FindMode::Hybrid,
2755            None,
2756            Some(&FindTune {
2757                bm25: 0.55,
2758                fuzzy: 0.35,
2759                vector: 0.0,
2760            }),
2761        );
2762
2763        assert_eq!(with_vector.len(), 1);
2764        assert_eq!(no_vector.len(), 1);
2765        assert_eq!(with_vector[0].score, no_vector[0].score);
2766    }
2767
2768    #[test]
2769    fn find_hides_metadata_nodes_unless_enabled() {
2770        let mut graph = GraphFile::new("test");
2771        graph.nodes.push(make_node(
2772            "^:graph_info",
2773            "Graph Metadata",
2774            "Internal metadata",
2775            &["graph_uuid=abc123"],
2776            &[],
2777            0.5,
2778            0.0,
2779            0,
2780        ));
2781        if let Some(meta) = graph
2782            .nodes
2783            .iter_mut()
2784            .find(|node| node.id == "^:graph_info")
2785        {
2786            meta.r#type = "^".to_owned();
2787        }
2788
2789        let hidden = find_all_matches_with_index(
2790            &graph,
2791            "graph uuid",
2792            true,
2793            false,
2794            FindMode::Hybrid,
2795            None,
2796            None,
2797        );
2798        assert!(hidden.is_empty());
2799
2800        let shown = find_all_matches_with_index(
2801            &graph,
2802            "graph uuid",
2803            true,
2804            true,
2805            FindMode::Hybrid,
2806            None,
2807            None,
2808        );
2809        assert_eq!(shown.len(), 1);
2810        assert_eq!(shown[0].node.id, "^:graph_info");
2811    }
2812}
kg/output.rs

kg/
output.rs