Skip to main content

reflex/pulse/
onboard.rs

1//! Onboard Guide: "Getting Started" page for developer onboarding
2//!
3//! Identifies entry points (main files, CLI handlers, API routes),
4//! suggests a reading order via dependency topology, and provides
5//! structural context for LLM narration.
6
7use anyhow::{Context, Result};
8use rusqlite::Connection;
9use rusqlite::OptionalExtension;
10use std::collections::{HashMap, HashSet, VecDeque};
11use std::path::Path;
12
13use crate::cache::CacheManager;
14use crate::models::{SearchResult, SymbolKind};
15
16/// Kind of entry point detected
17#[derive(Debug, Clone, PartialEq, Eq, Hash)]
18pub enum EntryPointKind {
19    CliBinary,
20    HttpServer,
21    Library,
22    Script,
23    TestRunner,
24}
25
26impl std::fmt::Display for EntryPointKind {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            EntryPointKind::CliBinary => write!(f, "CLI Binary"),
30            EntryPointKind::HttpServer => write!(f, "HTTP Server"),
31            EntryPointKind::Library => write!(f, "Library"),
32            EntryPointKind::Script => write!(f, "Script"),
33            EntryPointKind::TestRunner => write!(f, "Test Runner"),
34        }
35    }
36}
37
38/// A detected entry point in the codebase
39#[derive(Debug, Clone)]
40pub struct EntryPoint {
41    pub path: String,
42    pub kind: EntryPointKind,
43    pub key_symbols: Vec<String>,
44}
45
46/// A layer in the reading order (BFS from entry points)
47#[derive(Debug, Clone)]
48pub struct ReadingLayer {
49    pub depth: usize,
50    pub label: String,
51    pub files: Vec<String>,
52}
53
54/// Complete reading order computed via BFS from entry points
55#[derive(Debug, Clone)]
56pub struct ReadingOrder {
57    pub layers: Vec<ReadingLayer>,
58}
59
60/// Full onboard data
61#[derive(Debug, Clone)]
62pub struct OnboardData {
63    pub entry_points: Vec<EntryPoint>,
64    pub reading_order: ReadingOrder,
65    pub project_stats: ProjectStats,
66    pub narration: Option<String>,
67}
68
69/// Quick stats for the onboard page
70#[derive(Debug, Clone)]
71pub struct ProjectStats {
72    pub total_files: usize,
73    pub total_lines: usize,
74    pub languages: Vec<(String, usize)>,
75    pub module_count: usize,
76}
77
78/// Detect entry points by matching well-known file patterns and names
79pub fn detect_entry_points(cache: &CacheManager) -> Result<Vec<EntryPoint>> {
80    let db_path = cache.path().join("meta.db");
81    let conn = Connection::open(&db_path)
82        .context("Failed to open meta.db")?;
83
84    // Get all file paths
85    let mut stmt = conn.prepare("SELECT path FROM files ORDER BY path")?;
86    let paths: Vec<String> = stmt.query_map([], |row| row.get(0))?
87        .filter_map(|r| r.ok())
88        .collect();
89
90    let mut entry_points = Vec::new();
91    let mut seen_paths = HashSet::new();
92
93    for path in &paths {
94        let filename = Path::new(path).file_name()
95            .and_then(|f| f.to_str())
96            .unwrap_or("");
97        let lower = filename.to_lowercase();
98
99        // CLI binary entry points
100        if matches!(filename, "main.rs" | "main.go" | "main.py" | "main.c" | "main.cpp" | "main.zig")
101            || (filename == "cli.rs" || filename == "cli.ts" || filename == "cli.py" || filename == "cli.js")
102        {
103            if seen_paths.insert(path.clone()) {
104                let kind = EntryPointKind::CliBinary;
105                let symbols = extract_key_symbols_for_entry(&conn, path);
106                entry_points.push(EntryPoint { path: path.clone(), kind, key_symbols: symbols });
107            }
108            continue;
109        }
110
111        // HTTP server entry points
112        if matches!(filename, "server.rs" | "server.ts" | "server.js" | "server.py" | "server.go"
113            | "app.rs" | "app.ts" | "app.js" | "app.py" | "app.go"
114            | "routes.rs" | "routes.ts" | "routes.js" | "routes.py")
115        {
116            if seen_paths.insert(path.clone()) {
117                let symbols = extract_key_symbols_for_entry(&conn, path);
118                entry_points.push(EntryPoint { path: path.clone(), kind: EntryPointKind::HttpServer, key_symbols: symbols });
119            }
120            continue;
121        }
122
123        // Library entry points
124        if matches!(filename, "lib.rs" | "mod.rs" | "index.ts" | "index.js" | "__init__.py" | "mod.go") {
125            // Only include top-level or shallow lib/index files, not deeply nested ones
126            let depth = path.matches('/').count();
127            if depth <= 2 && seen_paths.insert(path.clone()) {
128                let symbols = extract_key_symbols_for_entry(&conn, path);
129                entry_points.push(EntryPoint { path: path.clone(), kind: EntryPointKind::Library, key_symbols: symbols });
130            }
131            continue;
132        }
133
134        // Script entry points (package.json scripts, Makefile, etc.)
135        if matches!(filename, "Makefile" | "Rakefile" | "Taskfile.yml" | "justfile") {
136            if seen_paths.insert(path.clone()) {
137                entry_points.push(EntryPoint { path: path.clone(), kind: EntryPointKind::Script, key_symbols: vec![] });
138            }
139            continue;
140        }
141
142        // Test runners
143        if matches!(lower.as_str(), "conftest.py" | "jest.config.js" | "jest.config.ts"
144            | "vitest.config.ts" | "vitest.config.js" | "pytest.ini" | "setup.cfg")
145            && path.matches('/').count() <= 1
146        {
147            if seen_paths.insert(path.clone()) {
148                entry_points.push(EntryPoint { path: path.clone(), kind: EntryPointKind::TestRunner, key_symbols: vec![] });
149            }
150        }
151    }
152
153    // Sort: CLI first, then HTTP, then Library, then others
154    entry_points.sort_by_key(|ep| match ep.kind {
155        EntryPointKind::CliBinary => 0,
156        EntryPointKind::HttpServer => 1,
157        EntryPointKind::Library => 2,
158        EntryPointKind::Script => 3,
159        EntryPointKind::TestRunner => 4,
160    });
161
162    Ok(entry_points)
163}
164
165/// Extract key symbol names for an entry point file from the symbol cache.
166///
167/// Queries the `symbols` table which stores all symbols for a file as a
168/// serialized JSON blob (`symbols_json` column containing `Vec<SearchResult>`).
169fn extract_key_symbols_for_entry(conn: &Connection, path: &str) -> Vec<String> {
170    // Get file_id
171    let file_id: Option<i64> = conn.query_row(
172        "SELECT id FROM files WHERE path = ?1",
173        [path],
174        |row| row.get(0),
175    ).ok();
176
177    let Some(file_id) = file_id else { return vec![] };
178
179    // Query the symbols table for this file's serialized symbols
180    let symbols_json: Option<String> = conn.query_row(
181        "SELECT symbols_json FROM symbols WHERE file_id = ?1",
182        [file_id],
183        |row| row.get(0),
184    ).optional().ok().flatten();
185
186    let Some(json) = symbols_json else { return vec![] };
187
188    // Deserialize and filter to key symbol kinds
189    let symbols: Vec<SearchResult> = match serde_json::from_str(&json) {
190        Ok(s) => s,
191        Err(_) => return vec![],
192    };
193
194    symbols.iter()
195        .filter(|sr| matches!(sr.kind,
196            SymbolKind::Function | SymbolKind::Struct | SymbolKind::Class
197            | SymbolKind::Trait | SymbolKind::Interface
198        ))
199        .filter_map(|sr| sr.symbol.clone())
200        .take(8)
201        .collect()
202}
203
204/// Compute reading order via BFS from entry points through the dependency graph
205pub fn compute_reading_order(cache: &CacheManager, entry_points: &[EntryPoint]) -> Result<ReadingOrder> {
206    let db_path = cache.path().join("meta.db");
207    let conn = Connection::open(&db_path)?;
208
209    // Build adjacency list: file_id -> [dependent file_ids]
210    // We traverse in the direction entry_point -> its dependencies
211    let mut deps: HashMap<i64, Vec<i64>> = HashMap::new();
212    let mut path_to_id: HashMap<String, i64> = HashMap::new();
213    let mut id_to_path: HashMap<i64, String> = HashMap::new();
214
215    // Load file id mappings
216    let mut stmt = conn.prepare("SELECT id, path FROM files")?;
217    let rows = stmt.query_map([], |row| {
218        Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
219    })?;
220    for row in rows.flatten() {
221        path_to_id.insert(row.1.clone(), row.0);
222        id_to_path.insert(row.0, row.1);
223    }
224
225    // Load dependency edges (file -> its dependency)
226    let mut stmt = conn.prepare(
227        "SELECT file_id, resolved_file_id FROM file_dependencies WHERE resolved_file_id IS NOT NULL"
228    )?;
229    let edges = stmt.query_map([], |row| {
230        Ok((row.get::<_, i64>(0)?, row.get::<_, i64>(1)?))
231    })?;
232    for edge in edges.flatten() {
233        deps.entry(edge.0).or_default().push(edge.1);
234    }
235
236    // BFS from entry points
237    let mut visited: HashSet<i64> = HashSet::new();
238    let mut queue: VecDeque<(i64, usize)> = VecDeque::new();
239    let mut layers_map: HashMap<usize, Vec<String>> = HashMap::new();
240
241    for ep in entry_points {
242        if let Some(&file_id) = path_to_id.get(&ep.path) {
243            if visited.insert(file_id) {
244                queue.push_back((file_id, 0));
245            }
246        }
247    }
248
249    while let Some((file_id, depth)) = queue.pop_front() {
250        if depth > 5 { continue; } // Cap depth to keep reading order manageable
251
252        if let Some(path) = id_to_path.get(&file_id) {
253            layers_map.entry(depth).or_default().push(path.clone());
254        }
255
256        if let Some(dep_ids) = deps.get(&file_id) {
257            for &dep_id in dep_ids {
258                if visited.insert(dep_id) {
259                    queue.push_back((dep_id, depth + 1));
260                }
261            }
262        }
263    }
264
265    let layer_labels = [
266        "Entry Points",
267        "Direct Dependencies",
268        "Core Infrastructure",
269        "Supporting Modules",
270        "Deep Dependencies",
271        "Periphery",
272    ];
273
274    let mut layers: Vec<ReadingLayer> = Vec::new();
275    for depth in 0..=5 {
276        if let Some(files) = layers_map.get(&depth) {
277            if !files.is_empty() {
278                layers.push(ReadingLayer {
279                    depth,
280                    label: layer_labels.get(depth).unwrap_or(&"Other").to_string(),
281                    files: files.clone(),
282                });
283            }
284        }
285    }
286
287    Ok(ReadingOrder { layers })
288}
289
290/// Gather project stats for the onboard page
291pub fn gather_project_stats(cache: &CacheManager, module_count: usize) -> Result<ProjectStats> {
292    let db_path = cache.path().join("meta.db");
293    let conn = Connection::open(&db_path)?;
294
295    let total_files: usize = conn.query_row("SELECT COUNT(*) FROM files", [], |r| r.get(0))?;
296    let total_lines: usize = conn.query_row("SELECT COALESCE(SUM(line_count), 0) FROM files", [], |r| r.get(0))?;
297
298    let mut stmt = conn.prepare(
299        "SELECT COALESCE(language, 'other'), COUNT(*) FROM files GROUP BY language ORDER BY COUNT(*) DESC LIMIT 10"
300    )?;
301    let languages: Vec<(String, usize)> = stmt.query_map([], |row| {
302        Ok((row.get::<_, String>(0)?, row.get::<_, usize>(1)?))
303    })?.filter_map(|r| r.ok()).collect();
304
305    Ok(ProjectStats {
306        total_files,
307        total_lines,
308        languages,
309        module_count,
310    })
311}
312
313/// Generate the full onboard data (structural phase)
314pub fn generate_onboard_structural(cache: &CacheManager, module_count: usize) -> Result<OnboardData> {
315    let entry_points = detect_entry_points(cache)?;
316    let reading_order = compute_reading_order(cache, &entry_points)?;
317    let project_stats = gather_project_stats(cache, module_count)?;
318
319    Ok(OnboardData {
320        entry_points,
321        reading_order,
322        project_stats,
323        narration: None,
324    })
325}
326
327/// Build structural context string for LLM narration
328pub fn build_onboard_context(data: &OnboardData) -> String {
329    let mut ctx = String::new();
330
331    ctx.push_str(&format!(
332        "Project size: {} files, {} lines across {} modules\n\n",
333        data.project_stats.total_files,
334        data.project_stats.total_lines,
335        data.project_stats.module_count,
336    ));
337
338    // Languages
339    ctx.push_str("Languages:\n");
340    for (lang, count) in &data.project_stats.languages {
341        ctx.push_str(&format!("- {}: {} files\n", lang, count));
342    }
343    ctx.push('\n');
344
345    // Entry points
346    ctx.push_str("Entry points:\n");
347    for ep in &data.entry_points {
348        ctx.push_str(&format!("- {} ({})", ep.path, ep.kind));
349        if !ep.key_symbols.is_empty() {
350            ctx.push_str(&format!(" — key symbols: {}", ep.key_symbols.join(", ")));
351        }
352        ctx.push('\n');
353    }
354    ctx.push('\n');
355
356    // Reading order
357    ctx.push_str("Suggested reading order (BFS from entry points through dependencies):\n");
358    for layer in &data.reading_order.layers {
359        ctx.push_str(&format!("Layer {} — {} ({} files):\n", layer.depth, layer.label, layer.files.len()));
360        for file in layer.files.iter().take(15) {
361            ctx.push_str(&format!("  - {}\n", file));
362        }
363        if layer.files.len() > 15 {
364            ctx.push_str(&format!("  ... and {} more\n", layer.files.len() - 15));
365        }
366    }
367
368    ctx
369}
370
371/// Render onboard data as markdown (structural content)
372pub fn render_onboard_markdown(data: &OnboardData) -> String {
373    let mut md = String::new();
374
375    // Narration (if available)
376    if let Some(ref narration) = data.narration {
377        md.push_str(narration);
378        md.push_str("\n\n");
379    }
380
381    // Quick stats
382    md.push_str("## At a Glance\n\n");
383    md.push_str(&format!(
384        "| Metric | Value |\n|---|---|\n| Files | {} |\n| Lines | {} |\n| Modules | {} |\n| Languages | {} |\n\n",
385        data.project_stats.total_files,
386        data.project_stats.total_lines,
387        data.project_stats.module_count,
388        data.project_stats.languages.len(),
389    ));
390
391    // Entry points table
392    md.push_str("## Entry Points\n\n");
393    md.push_str("These are the starting files — where execution begins or where the public API is exposed.\n\n");
394    md.push_str("| File | Kind | Key Symbols |\n|---|---|---|\n");
395    for ep in &data.entry_points {
396        let symbols = if ep.key_symbols.is_empty() {
397            "—".to_string()
398        } else {
399            ep.key_symbols.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
400        };
401        md.push_str(&format!("| `{}` | {} | {} |\n", ep.path, ep.kind, symbols));
402    }
403    md.push('\n');
404
405    // Reading order as Mermaid flowchart
406    if !data.reading_order.layers.is_empty() {
407        md.push_str("## Reading Order\n\n");
408        md.push_str("Start at the top and work your way down. Each layer depends on the one below it.\n\n");
409
410        md.push_str("{% mermaid() %}\nflowchart TD\n");
411        for layer in &data.reading_order.layers {
412            let node_id = format!("L{}", layer.depth);
413            let file_list: String = layer.files.iter().take(6)
414                .map(|f| {
415                    // Extract just the filename for readability
416                    Path::new(f).file_name()
417                        .and_then(|n| n.to_str())
418                        .unwrap_or(f)
419                })
420                .collect::<Vec<_>>()
421                .join(", ");
422            let suffix = if layer.files.len() > 6 {
423                format!(" +{} more", layer.files.len() - 6)
424            } else {
425                String::new()
426            };
427            md.push_str(&format!(
428                "    {}[\"{}: {}{}\"]\n",
429                node_id, layer.label, file_list, suffix
430            ));
431        }
432
433        // Connect layers top-to-bottom
434        for i in 0..data.reading_order.layers.len().saturating_sub(1) {
435            md.push_str(&format!("    L{} --> L{}\n", i, i + 1));
436        }
437
438        // Styling
439        md.push_str("    style L0 fill:#a78bfa,color:#0d0d0d,stroke:#a78bfa\n");
440        md.push_str("{% end %}\n\n");
441
442        // Detailed file lists per layer
443        for layer in &data.reading_order.layers {
444            md.push_str(&format!("### Layer {}: {}\n\n", layer.depth, layer.label));
445            for file in &layer.files {
446                md.push_str(&format!("- `{}`\n", file));
447            }
448            md.push('\n');
449        }
450    }
451
452    md
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn test_entry_point_kind_display() {
461        assert_eq!(format!("{}", EntryPointKind::CliBinary), "CLI Binary");
462        assert_eq!(format!("{}", EntryPointKind::HttpServer), "HTTP Server");
463        assert_eq!(format!("{}", EntryPointKind::Library), "Library");
464    }
465
466    #[test]
467    fn test_render_onboard_markdown_empty() {
468        let data = OnboardData {
469            entry_points: vec![],
470            reading_order: ReadingOrder { layers: vec![] },
471            project_stats: ProjectStats {
472                total_files: 100,
473                total_lines: 5000,
474                languages: vec![("Rust".to_string(), 80), ("Python".to_string(), 20)],
475                module_count: 5,
476            },
477            narration: None,
478        };
479        let md = render_onboard_markdown(&data);
480        assert!(md.contains("## At a Glance"));
481        assert!(md.contains("100"));
482        assert!(md.contains("5000"));
483    }
484
485    #[test]
486    fn test_build_onboard_context() {
487        let data = OnboardData {
488            entry_points: vec![EntryPoint {
489                path: "src/main.rs".to_string(),
490                kind: EntryPointKind::CliBinary,
491                key_symbols: vec!["main".to_string()],
492            }],
493            reading_order: ReadingOrder {
494                layers: vec![ReadingLayer {
495                    depth: 0,
496                    label: "Entry Points".to_string(),
497                    files: vec!["src/main.rs".to_string()],
498                }],
499            },
500            project_stats: ProjectStats {
501                total_files: 50,
502                total_lines: 3000,
503                languages: vec![("Rust".to_string(), 50)],
504                module_count: 3,
505            },
506            narration: None,
507        };
508        let ctx = build_onboard_context(&data);
509        assert!(ctx.contains("src/main.rs"));
510        assert!(ctx.contains("CLI Binary"));
511        assert!(ctx.contains("Entry Points"));
512    }
513}