Skip to main content

gitcortex_mcp/mcp/
tour.rs

1//! Guided-tour generation — deterministic graph traversal that picks the
2//! "important" symbols of a repo (or of a seeded subgraph) and orders them so
3//! a reader can walk through the codebase top-down.
4//!
5//! Algorithm (pure graph, no LLM):
6//! 1. Build adjacency from `Contains` + `Calls` edges.
7//! 2. Score each node by in-degree across `Calls` (centrality).
8//! 3. Pick top-K nodes globally, or BFS from a seed when one is provided.
9//! 4. Return ordered tour steps with rationale per step.
10
11use std::collections::{HashMap, HashSet, VecDeque};
12
13use gitcortex_core::{
14    error::Result,
15    graph::Node,
16    schema::{EdgeKind, NodeKind},
17    store::GraphStore,
18};
19use serde::Serialize;
20
21/// One step in a generated tour.
22#[derive(Debug, Clone, Serialize)]
23pub struct TourStep {
24    pub order: u32,
25    pub name: String,
26    pub qualified_name: String,
27    pub kind: String,
28    pub file: String,
29    pub start_line: u32,
30    /// Why this step appears here — e.g. "high in-degree (12 callers)" or
31    /// "entry point: public function" or "called by previous step".
32    pub reason: String,
33}
34
35/// A component (directory/module) in the architecture summary.
36#[derive(Debug, Clone, Serialize)]
37pub struct Component {
38    /// Directory path that groups the component's files, e.g. `src/parser`.
39    pub path: String,
40    /// Number of distinct source files in the component.
41    pub files: u32,
42    /// Highest-centrality public symbols in the component (up to 4).
43    pub key_symbols: Vec<String>,
44    /// Other components this one calls into / uses / imports from.
45    pub depends_on: Vec<String>,
46}
47
48#[derive(Debug, Clone, Serialize)]
49pub struct Tour {
50    pub seed: Option<String>,
51    pub branch: String,
52    pub steps: Vec<TourStep>,
53    /// Component-level architecture summary (no-seed tours only). Answers
54    /// "what are the main components and how do they fit together" in one call.
55    pub components: Vec<Component>,
56}
57
58/// Default tour length when caller doesn't specify.
59const DEFAULT_TOUR_LEN: usize = 12;
60/// Hard cap to keep tour outputs bounded.
61const MAX_TOUR_LEN: usize = 50;
62/// No-seed tours lead with the component map; the global central-symbol list is
63/// kept short so the result is a compact, self-contained overview.
64const NO_SEED_STEP_CAP: usize = 5;
65
66/// Generate a tour for `branch`. If `seed` is `Some`, the tour is rooted at
67/// that symbol and walks outward via `Calls` and `Contains` edges. If `None`,
68/// the tour picks the highest-centrality public entry points across the repo.
69pub fn generate<S: GraphStore + ?Sized>(
70    store: &S,
71    branch: &str,
72    seed: Option<&str>,
73    limit: Option<usize>,
74) -> Result<Tour> {
75    let limit = limit.unwrap_or(DEFAULT_TOUR_LEN).min(MAX_TOUR_LEN);
76    let nodes = store.list_all_nodes(branch)?;
77    let edges = store.list_all_edges(branch)?;
78
79    let mut in_degree: HashMap<String, u32> = HashMap::new();
80    let mut callees_of: HashMap<String, Vec<String>> = HashMap::new();
81    for e in &edges {
82        if matches!(e.kind, EdgeKind::Calls) {
83            *in_degree.entry(e.dst.as_str()).or_insert(0) += 1;
84            callees_of
85                .entry(e.src.as_str())
86                .or_default()
87                .push(e.dst.as_str());
88        }
89    }
90
91    // Cross-component dependency edges (Calls/Uses/Imports), kept for the
92    // architecture summary so we can show how components fit together.
93    let mut dep_edges: Vec<(String, String)> = Vec::new();
94    for e in &edges {
95        if matches!(e.kind, EdgeKind::Calls | EdgeKind::Uses | EdgeKind::Imports) {
96            dep_edges.push((e.src.as_str(), e.dst.as_str()));
97        }
98    }
99
100    let by_id: HashMap<String, Node> = nodes.into_iter().map(|n| (n.id.as_str(), n)).collect();
101
102    let (steps, components) = match seed {
103        Some(name) => (
104            seeded_tour(&by_id, &callees_of, &in_degree, name, limit),
105            Vec::new(),
106        ),
107        None => (
108            // No-seed: the component map is the answer. Keep only a short
109            // "most central overall" list (top 5) so the payload stays small —
110            // per-component key symbols already cover the important ones.
111            global_tour(&by_id, &in_degree, limit.min(NO_SEED_STEP_CAP)),
112            architecture_summary(&by_id, &in_degree, &dep_edges, limit),
113        ),
114    };
115
116    Ok(Tour {
117        seed: seed.map(str::to_owned),
118        branch: branch.to_owned(),
119        steps,
120        components,
121    })
122}
123
124/// Derive a component label from a file path: the parent directory, or the
125/// stem when the file is at the repo root.
126fn component_of(file: &str) -> String {
127    match file.rfind('/') {
128        Some(i) => file[..i].to_owned(),
129        None => file.to_owned(),
130    }
131}
132
133/// Group symbols into components (directories) and summarise each: file count,
134/// top central symbols, and the other components it depends on. Components are
135/// ranked by aggregate centrality so the most important appear first.
136fn architecture_summary(
137    by_id: &HashMap<String, Node>,
138    in_degree: &HashMap<String, u32>,
139    dep_edges: &[(String, String)],
140    limit: usize,
141) -> Vec<Component> {
142    // id → component, for edge resolution.
143    let comp_of_id: HashMap<&str, String> = by_id
144        .iter()
145        .map(|(id, n)| (id.as_str(), component_of(&n.file.display().to_string())))
146        .collect();
147
148    // Per-component aggregates.
149    let mut files: HashMap<String, HashSet<String>> = HashMap::new();
150    let mut score: HashMap<String, u32> = HashMap::new();
151    // (symbol_name "name — file:line", degree) for picking key symbols. The
152    // location is embedded so a tour answer needs no follow-up lookups.
153    let mut symbols: HashMap<String, Vec<(String, u32)>> = HashMap::new();
154    for n in by_id.values() {
155        let file = n.file.display().to_string();
156        let comp = component_of(&file);
157        files.entry(comp.clone()).or_default().insert(file.clone());
158        let deg = in_degree.get(&n.id.as_str()).copied().unwrap_or(0);
159        *score.entry(comp.clone()).or_insert(0) += deg;
160        if matches!(
161            n.kind,
162            NodeKind::Function
163                | NodeKind::Method
164                | NodeKind::Struct
165                | NodeKind::Trait
166                | NodeKind::Interface
167        ) {
168            let label = format!("{} — {}:{}", n.name, file, n.span.start_line);
169            symbols.entry(comp).or_default().push((label, deg));
170        }
171    }
172
173    // Cross-component dependencies.
174    let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
175    for (src, dst) in dep_edges {
176        if let (Some(sc), Some(dc)) = (comp_of_id.get(src.as_str()), comp_of_id.get(dst.as_str())) {
177            if sc != dc {
178                deps.entry(sc.clone()).or_default().insert(dc.clone());
179            }
180        }
181    }
182
183    let mut ranked: Vec<(String, u32)> = score.into_iter().collect();
184    ranked.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
185
186    ranked
187        .into_iter()
188        .take(limit)
189        .map(|(comp, _)| {
190            let mut key = symbols.remove(&comp).unwrap_or_default();
191            key.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
192            key.dedup_by(|a, b| a.0 == b.0);
193            let key_symbols: Vec<String> = key.into_iter().take(4).map(|(name, _)| name).collect();
194            let mut depends_on: Vec<String> = deps
195                .get(&comp)
196                .map(|s| s.iter().cloned().collect())
197                .unwrap_or_default();
198            depends_on.sort();
199            Component {
200                files: files.get(&comp).map(|f| f.len() as u32).unwrap_or(0),
201                path: comp,
202                key_symbols,
203                depends_on,
204            }
205        })
206        .collect()
207}
208
209/// Pick highest-centrality public functions/methods across the repo.
210fn global_tour(
211    by_id: &HashMap<String, Node>,
212    in_degree: &HashMap<String, u32>,
213    limit: usize,
214) -> Vec<TourStep> {
215    let mut scored: Vec<(&Node, u32)> = by_id
216        .values()
217        .filter(|n| {
218            matches!(
219                n.kind,
220                NodeKind::Function
221                    | NodeKind::Method
222                    | NodeKind::Struct
223                    | NodeKind::Trait
224                    | NodeKind::Interface
225            )
226        })
227        .map(|n| {
228            let deg = in_degree.get(&n.id.as_str()).copied().unwrap_or(0);
229            (n, deg)
230        })
231        .collect();
232    // Sort: higher degree first, then alphabetic by qualified_name for determinism.
233    scored.sort_by(|a, b| {
234        b.1.cmp(&a.1)
235            .then_with(|| a.0.qualified_name.cmp(&b.0.qualified_name))
236    });
237
238    scored
239        .into_iter()
240        .take(limit)
241        .enumerate()
242        .map(|(i, (n, deg))| TourStep {
243            order: (i + 1) as u32,
244            name: n.name.clone(),
245            qualified_name: n.qualified_name.clone(),
246            kind: n.kind.to_string(),
247            file: n.file.display().to_string(),
248            start_line: n.span.start_line,
249            reason: if deg == 0 {
250                "public surface (no inbound calls)".into()
251            } else {
252                format!("central — {deg} inbound calls")
253            },
254        })
255        .collect()
256}
257
258/// BFS from `seed_name` along `Calls`, preserving discovery order.
259fn seeded_tour(
260    by_id: &HashMap<String, Node>,
261    callees_of: &HashMap<String, Vec<String>>,
262    in_degree: &HashMap<String, u32>,
263    seed_name: &str,
264    limit: usize,
265) -> Vec<TourStep> {
266    // Find a seed node by unqualified name; pick the highest-centrality one
267    // when multiple match (matches user intent — "tour main" picks the
268    // central main).
269    let seed_node = by_id
270        .values()
271        .filter(|n| n.name == seed_name)
272        .max_by_key(|n| in_degree.get(&n.id.as_str()).copied().unwrap_or(0));
273    let Some(seed) = seed_node else {
274        return Vec::new();
275    };
276
277    let mut visited: HashSet<String> = HashSet::new();
278    let mut queue: VecDeque<(String, u32)> = VecDeque::new();
279    queue.push_back((seed.id.as_str(), 0));
280    visited.insert(seed.id.as_str());
281
282    let mut steps: Vec<TourStep> = Vec::new();
283    while let Some((id, hop)) = queue.pop_front() {
284        if steps.len() >= limit {
285            break;
286        }
287        let Some(n) = by_id.get(&id) else { continue };
288        let reason = if hop == 0 {
289            "seed".into()
290        } else if hop == 1 {
291            "directly called by seed".into()
292        } else {
293            format!("{hop} hops from seed")
294        };
295        steps.push(TourStep {
296            order: (steps.len() + 1) as u32,
297            name: n.name.clone(),
298            qualified_name: n.qualified_name.clone(),
299            kind: n.kind.to_string(),
300            file: n.file.display().to_string(),
301            start_line: n.span.start_line,
302            reason,
303        });
304        if let Some(next) = callees_of.get(&id) {
305            for callee_id in next {
306                if visited.insert(callee_id.clone()) {
307                    queue.push_back((callee_id.clone(), hop + 1));
308                }
309            }
310        }
311    }
312
313    steps
314}
315
316/// Render a tour as a human-readable markdown plan.
317pub fn render_markdown(tour: &Tour) -> String {
318    use std::fmt::Write;
319    let mut out = String::with_capacity(512);
320
321    // No-seed tours ARE the component-level architecture map — a compact,
322    // self-contained answer to "what are the main components and how do they
323    // fit together". A short "most central" list follows; we deliberately do
324    // not dump a long step list, keeping the result token-cheap.
325    if tour.seed.is_none() && !tour.components.is_empty() {
326        let _ = writeln!(out, "# Architecture (branch={})", tour.branch);
327        let _ = writeln!(
328            out,
329            "\n## Components ({} shown, ranked by centrality)\n",
330            tour.components.len()
331        );
332        for c in &tour.components {
333            let _ = writeln!(out, "### `{}` ({} files)", c.path, c.files);
334            if !c.key_symbols.is_empty() {
335                let keys = c
336                    .key_symbols
337                    .iter()
338                    .map(|s| format!("`{s}`"))
339                    .collect::<Vec<_>>()
340                    .join(", ");
341                let _ = writeln!(out, "- key: {keys}");
342            }
343            if !c.depends_on.is_empty() {
344                let _ = writeln!(out, "- depends on: {}", c.depends_on.join(", "));
345            }
346        }
347        if !tour.steps.is_empty() {
348            let names = tour
349                .steps
350                .iter()
351                .map(|s| format!("`{}`", s.name))
352                .collect::<Vec<_>>()
353                .join(", ");
354            let _ = writeln!(out, "\n## Most central overall\n{names}");
355        }
356        return out;
357    }
358
359    let _ = writeln!(
360        out,
361        "# Tour ({} steps, branch={})",
362        tour.steps.len(),
363        tour.branch
364    );
365    if let Some(seed) = &tour.seed {
366        let _ = writeln!(out, "Seed: `{seed}`");
367    }
368    let _ = writeln!(out);
369    for s in &tour.steps {
370        let _ = writeln!(
371            out,
372            "{}. `{}` ({})  — `{}:{}`  _{}_",
373            s.order, s.name, s.kind, s.file, s.start_line, s.reason
374        );
375    }
376    out
377}