Skip to main content

reflex/pulse/
onboard.rs

1//! Onboard Guide: "Getting Started" page for developer onboarding
2//!
3//! Identifies entry points (main files, CLI handlers, API routes),
4//! suggests a reading order via dependency topology, and provides
5//! structural context for LLM narration.
6
7use anyhow::{Context, Result};
8use rusqlite::Connection;
9use rusqlite::OptionalExtension;
10use std::collections::{HashMap, HashSet, VecDeque};
11use std::path::Path;
12
13use crate::cache::CacheManager;
14use crate::models::{SearchResult, SymbolKind};
15
16/// Kind of entry point detected
17#[derive(Debug, Clone, PartialEq, Eq, Hash)]
18pub enum EntryPointKind {
19    CliBinary,
20    HttpServer,
21    Library,
22    Script,
23    TestRunner,
24}
25
26impl std::fmt::Display for EntryPointKind {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            EntryPointKind::CliBinary => write!(f, "CLI Binary"),
30            EntryPointKind::HttpServer => write!(f, "HTTP Server"),
31            EntryPointKind::Library => write!(f, "Library"),
32            EntryPointKind::Script => write!(f, "Script"),
33            EntryPointKind::TestRunner => write!(f, "Test Runner"),
34        }
35    }
36}
37
38/// A detected entry point in the codebase
39#[derive(Debug, Clone)]
40pub struct EntryPoint {
41    pub path: String,
42    pub kind: EntryPointKind,
43    pub key_symbols: Vec<String>,
44}
45
46/// A layer in the reading order (BFS from entry points)
47#[derive(Debug, Clone)]
48pub struct ReadingLayer {
49    pub depth: usize,
50    pub label: String,
51    pub files: Vec<String>,
52}
53
54/// Complete reading order computed via BFS from entry points
55#[derive(Debug, Clone)]
56pub struct ReadingOrder {
57    pub layers: Vec<ReadingLayer>,
58}
59
60/// Full onboard data
61#[derive(Debug, Clone)]
62pub struct OnboardData {
63    pub entry_points: Vec<EntryPoint>,
64    pub reading_order: ReadingOrder,
65    pub project_stats: ProjectStats,
66    pub narration: Option<String>,
67}
68
69/// Quick stats for the onboard page
70#[derive(Debug, Clone)]
71pub struct ProjectStats {
72    pub total_files: usize,
73    pub total_lines: usize,
74    pub languages: Vec<(String, usize)>,
75    pub module_count: usize,
76}
77
78/// Detect entry points by matching well-known file patterns and names
79pub fn detect_entry_points(cache: &CacheManager) -> Result<Vec<EntryPoint>> {
80    let db_path = cache.path().join("meta.db");
81    let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
82
83    // Get all file paths
84    let mut stmt = conn.prepare("SELECT path FROM files ORDER BY path")?;
85    let paths: Vec<String> = stmt
86        .query_map([], |row| row.get(0))?
87        .filter_map(|r| r.ok())
88        .collect();
89
90    let mut entry_points = Vec::new();
91    let mut seen_paths = HashSet::new();
92
93    for path in &paths {
94        let filename = Path::new(path)
95            .file_name()
96            .and_then(|f| f.to_str())
97            .unwrap_or("");
98        let lower = filename.to_lowercase();
99
100        // CLI binary entry points
101        if matches!(
102            filename,
103            "main.rs" | "main.go" | "main.py" | "main.c" | "main.cpp" | "main.zig"
104        ) || (filename == "cli.rs"
105            || filename == "cli.ts"
106            || filename == "cli.py"
107            || filename == "cli.js")
108        {
109            if seen_paths.insert(path.clone()) {
110                let kind = EntryPointKind::CliBinary;
111                let symbols = extract_key_symbols_for_entry(&conn, path);
112                entry_points.push(EntryPoint {
113                    path: path.clone(),
114                    kind,
115                    key_symbols: symbols,
116                });
117            }
118            continue;
119        }
120
121        // HTTP server entry points
122        if matches!(
123            filename,
124            "server.rs"
125                | "server.ts"
126                | "server.js"
127                | "server.py"
128                | "server.go"
129                | "app.rs"
130                | "app.ts"
131                | "app.js"
132                | "app.py"
133                | "app.go"
134                | "routes.rs"
135                | "routes.ts"
136                | "routes.js"
137                | "routes.py"
138        ) {
139            if seen_paths.insert(path.clone()) {
140                let symbols = extract_key_symbols_for_entry(&conn, path);
141                entry_points.push(EntryPoint {
142                    path: path.clone(),
143                    kind: EntryPointKind::HttpServer,
144                    key_symbols: symbols,
145                });
146            }
147            continue;
148        }
149
150        // Library entry points
151        if matches!(
152            filename,
153            "lib.rs" | "mod.rs" | "index.ts" | "index.js" | "__init__.py" | "mod.go"
154        ) {
155            // Only include top-level or shallow lib/index files, not deeply nested ones
156            let depth = path.matches('/').count();
157            if depth <= 2 && seen_paths.insert(path.clone()) {
158                let symbols = extract_key_symbols_for_entry(&conn, path);
159                entry_points.push(EntryPoint {
160                    path: path.clone(),
161                    kind: EntryPointKind::Library,
162                    key_symbols: symbols,
163                });
164            }
165            continue;
166        }
167
168        // Script entry points (package.json scripts, Makefile, etc.)
169        if matches!(
170            filename,
171            "Makefile" | "Rakefile" | "Taskfile.yml" | "justfile"
172        ) {
173            if seen_paths.insert(path.clone()) {
174                entry_points.push(EntryPoint {
175                    path: path.clone(),
176                    kind: EntryPointKind::Script,
177                    key_symbols: vec![],
178                });
179            }
180            continue;
181        }
182
183        // Test runners
184        if matches!(
185            lower.as_str(),
186            "conftest.py"
187                | "jest.config.js"
188                | "jest.config.ts"
189                | "vitest.config.ts"
190                | "vitest.config.js"
191                | "pytest.ini"
192                | "setup.cfg"
193        ) && path.matches('/').count() <= 1
194        {
195            if seen_paths.insert(path.clone()) {
196                entry_points.push(EntryPoint {
197                    path: path.clone(),
198                    kind: EntryPointKind::TestRunner,
199                    key_symbols: vec![],
200                });
201            }
202        }
203    }
204
205    // Sort: CLI first, then HTTP, then Library, then others
206    entry_points.sort_by_key(|ep| match ep.kind {
207        EntryPointKind::CliBinary => 0,
208        EntryPointKind::HttpServer => 1,
209        EntryPointKind::Library => 2,
210        EntryPointKind::Script => 3,
211        EntryPointKind::TestRunner => 4,
212    });
213
214    Ok(entry_points)
215}
216
217/// Extract key symbol names for an entry point file from the symbol cache.
218///
219/// Queries the `symbols` table which stores all symbols for a file as a
220/// serialized JSON blob (`symbols_json` column containing `Vec<SearchResult>`).
221fn extract_key_symbols_for_entry(conn: &Connection, path: &str) -> Vec<String> {
222    // Get file_id
223    let file_id: Option<i64> = conn
224        .query_row("SELECT id FROM files WHERE path = ?1", [path], |row| {
225            row.get(0)
226        })
227        .ok();
228
229    let Some(file_id) = file_id else {
230        return vec![];
231    };
232
233    // Query the symbols table for this file's serialized symbols
234    let symbols_json: Option<String> = conn
235        .query_row(
236            "SELECT symbols_json FROM symbols WHERE file_id = ?1",
237            [file_id],
238            |row| row.get(0),
239        )
240        .optional()
241        .ok()
242        .flatten();
243
244    let Some(json) = symbols_json else {
245        return vec![];
246    };
247
248    // Deserialize and filter to key symbol kinds
249    let symbols: Vec<SearchResult> = match serde_json::from_str(&json) {
250        Ok(s) => s,
251        Err(_) => return vec![],
252    };
253
254    symbols
255        .iter()
256        .filter(|sr| {
257            matches!(
258                sr.kind,
259                SymbolKind::Function
260                    | SymbolKind::Struct
261                    | SymbolKind::Class
262                    | SymbolKind::Trait
263                    | SymbolKind::Interface
264            )
265        })
266        .filter_map(|sr| sr.symbol.clone())
267        .take(8)
268        .collect()
269}
270
271/// Compute reading order via BFS from entry points through the dependency graph
272pub fn compute_reading_order(
273    cache: &CacheManager,
274    entry_points: &[EntryPoint],
275) -> Result<ReadingOrder> {
276    let db_path = cache.path().join("meta.db");
277    let conn = Connection::open(&db_path)?;
278
279    // Build adjacency list: file_id -> [dependent file_ids]
280    // We traverse in the direction entry_point -> its dependencies
281    let mut deps: HashMap<i64, Vec<i64>> = HashMap::new();
282    let mut path_to_id: HashMap<String, i64> = HashMap::new();
283    let mut id_to_path: HashMap<i64, String> = HashMap::new();
284
285    // Load file id mappings
286    let mut stmt = conn.prepare("SELECT id, path FROM files")?;
287    let rows = stmt.query_map([], |row| {
288        Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
289    })?;
290    for row in rows.flatten() {
291        path_to_id.insert(row.1.clone(), row.0);
292        id_to_path.insert(row.0, row.1);
293    }
294
295    // Load dependency edges (file -> its dependency)
296    let mut stmt = conn.prepare(
297        "SELECT file_id, resolved_file_id FROM file_dependencies WHERE resolved_file_id IS NOT NULL"
298    )?;
299    let edges = stmt.query_map([], |row| Ok((row.get::<_, i64>(0)?, row.get::<_, i64>(1)?)))?;
300    for edge in edges.flatten() {
301        deps.entry(edge.0).or_default().push(edge.1);
302    }
303
304    // BFS from entry points
305    let mut visited: HashSet<i64> = HashSet::new();
306    let mut queue: VecDeque<(i64, usize)> = VecDeque::new();
307    let mut layers_map: HashMap<usize, Vec<String>> = HashMap::new();
308
309    for ep in entry_points {
310        if let Some(&file_id) = path_to_id.get(&ep.path) {
311            if visited.insert(file_id) {
312                queue.push_back((file_id, 0));
313            }
314        }
315    }
316
317    while let Some((file_id, depth)) = queue.pop_front() {
318        if depth > 5 {
319            continue;
320        } // Cap depth to keep reading order manageable
321
322        if let Some(path) = id_to_path.get(&file_id) {
323            layers_map.entry(depth).or_default().push(path.clone());
324        }
325
326        if let Some(dep_ids) = deps.get(&file_id) {
327            for &dep_id in dep_ids {
328                if visited.insert(dep_id) {
329                    queue.push_back((dep_id, depth + 1));
330                }
331            }
332        }
333    }
334
335    let layer_labels = [
336        "Entry Points",
337        "Direct Dependencies",
338        "Core Infrastructure",
339        "Supporting Modules",
340        "Deep Dependencies",
341        "Periphery",
342    ];
343
344    let mut layers: Vec<ReadingLayer> = Vec::new();
345    for depth in 0..=5 {
346        if let Some(files) = layers_map.get(&depth) {
347            if !files.is_empty() {
348                layers.push(ReadingLayer {
349                    depth,
350                    label: layer_labels.get(depth).unwrap_or(&"Other").to_string(),
351                    files: files.clone(),
352                });
353            }
354        }
355    }
356
357    Ok(ReadingOrder { layers })
358}
359
360/// Gather project stats for the onboard page
361pub fn gather_project_stats(cache: &CacheManager, module_count: usize) -> Result<ProjectStats> {
362    let db_path = cache.path().join("meta.db");
363    let conn = Connection::open(&db_path)?;
364
365    let total_files: usize = conn.query_row("SELECT COUNT(*) FROM files", [], |r| r.get(0))?;
366    let total_lines: usize =
367        conn.query_row("SELECT COALESCE(SUM(line_count), 0) FROM files", [], |r| {
368            r.get(0)
369        })?;
370
371    let mut stmt = conn.prepare(
372        "SELECT COALESCE(language, 'other'), COUNT(*) FROM files GROUP BY language ORDER BY COUNT(*) DESC LIMIT 10"
373    )?;
374    let languages: Vec<(String, usize)> = stmt
375        .query_map([], |row| {
376            Ok((row.get::<_, String>(0)?, row.get::<_, usize>(1)?))
377        })?
378        .filter_map(|r| r.ok())
379        .collect();
380
381    Ok(ProjectStats {
382        total_files,
383        total_lines,
384        languages,
385        module_count,
386    })
387}
388
389/// Generate the full onboard data (structural phase)
390pub fn generate_onboard_structural(
391    cache: &CacheManager,
392    module_count: usize,
393) -> Result<OnboardData> {
394    let entry_points = detect_entry_points(cache)?;
395    let reading_order = compute_reading_order(cache, &entry_points)?;
396    let project_stats = gather_project_stats(cache, module_count)?;
397
398    Ok(OnboardData {
399        entry_points,
400        reading_order,
401        project_stats,
402        narration: None,
403    })
404}
405
406/// Build structural context string for LLM narration
407pub fn build_onboard_context(data: &OnboardData) -> String {
408    let mut ctx = String::new();
409
410    ctx.push_str(&format!(
411        "Project size: {} files, {} lines across {} modules\n\n",
412        data.project_stats.total_files,
413        data.project_stats.total_lines,
414        data.project_stats.module_count,
415    ));
416
417    // Languages
418    ctx.push_str("Languages:\n");
419    for (lang, count) in &data.project_stats.languages {
420        ctx.push_str(&format!("- {}: {} files\n", lang, count));
421    }
422    ctx.push('\n');
423
424    // Entry points
425    ctx.push_str("Entry points:\n");
426    for ep in &data.entry_points {
427        ctx.push_str(&format!("- {} ({})", ep.path, ep.kind));
428        if !ep.key_symbols.is_empty() {
429            ctx.push_str(&format!(" — key symbols: {}", ep.key_symbols.join(", ")));
430        }
431        ctx.push('\n');
432    }
433    ctx.push('\n');
434
435    // Reading order
436    ctx.push_str("Suggested reading order (BFS from entry points through dependencies):\n");
437    for layer in &data.reading_order.layers {
438        ctx.push_str(&format!(
439            "Layer {} — {} ({} files):\n",
440            layer.depth,
441            layer.label,
442            layer.files.len()
443        ));
444        for file in layer.files.iter().take(15) {
445            ctx.push_str(&format!("  - {}\n", file));
446        }
447        if layer.files.len() > 15 {
448            ctx.push_str(&format!("  ... and {} more\n", layer.files.len() - 15));
449        }
450    }
451
452    ctx
453}
454
455/// Render onboard data as markdown (structural content)
456pub fn render_onboard_markdown(data: &OnboardData) -> String {
457    let mut md = String::new();
458
459    // Narration (if available)
460    if let Some(ref narration) = data.narration {
461        md.push_str(narration);
462        md.push_str("\n\n");
463    }
464
465    // Quick stats
466    md.push_str("## At a Glance\n\n");
467    md.push_str(&format!(
468        "| Metric | Value |\n|---|---|\n| Files | {} |\n| Lines | {} |\n| Modules | {} |\n| Languages | {} |\n\n",
469        data.project_stats.total_files,
470        data.project_stats.total_lines,
471        data.project_stats.module_count,
472        data.project_stats.languages.len(),
473    ));
474
475    // Entry points table
476    md.push_str("## Entry Points\n\n");
477    md.push_str("These are the starting files — where execution begins or where the public API is exposed.\n\n");
478    md.push_str("| File | Kind | Key Symbols |\n|---|---|---|\n");
479    for ep in &data.entry_points {
480        let symbols = if ep.key_symbols.is_empty() {
481            "—".to_string()
482        } else {
483            ep.key_symbols
484                .iter()
485                .map(|s| format!("`{}`", s))
486                .collect::<Vec<_>>()
487                .join(", ")
488        };
489        md.push_str(&format!("| `{}` | {} | {} |\n", ep.path, ep.kind, symbols));
490    }
491    md.push('\n');
492
493    // Reading order as Mermaid flowchart
494    if !data.reading_order.layers.is_empty() {
495        md.push_str("## Reading Order\n\n");
496        md.push_str(
497            "Start at the top and work your way down. Each layer depends on the one below it.\n\n",
498        );
499
500        md.push_str("{% mermaid() %}\nflowchart TD\n");
501        for layer in &data.reading_order.layers {
502            let node_id = format!("L{}", layer.depth);
503            let file_list: String = layer
504                .files
505                .iter()
506                .take(6)
507                .map(|f| {
508                    // Extract just the filename for readability
509                    Path::new(f)
510                        .file_name()
511                        .and_then(|n| n.to_str())
512                        .unwrap_or(f)
513                })
514                .collect::<Vec<_>>()
515                .join(", ");
516            let suffix = if layer.files.len() > 6 {
517                format!(" +{} more", layer.files.len() - 6)
518            } else {
519                String::new()
520            };
521            md.push_str(&format!(
522                "    {}[\"{}: {}{}\"]\n",
523                node_id, layer.label, file_list, suffix
524            ));
525        }
526
527        // Connect layers top-to-bottom
528        for i in 0..data.reading_order.layers.len().saturating_sub(1) {
529            md.push_str(&format!("    L{} --> L{}\n", i, i + 1));
530        }
531
532        // Styling
533        md.push_str("    style L0 fill:#a78bfa,color:#0d0d0d,stroke:#a78bfa\n");
534        md.push_str("{% end %}\n\n");
535
536        // Detailed file lists per layer
537        for layer in &data.reading_order.layers {
538            md.push_str(&format!("### Layer {}: {}\n\n", layer.depth, layer.label));
539            for file in &layer.files {
540                md.push_str(&format!("- `{}`\n", file));
541            }
542            md.push('\n');
543        }
544    }
545
546    md
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn test_entry_point_kind_display() {
555        assert_eq!(format!("{}", EntryPointKind::CliBinary), "CLI Binary");
556        assert_eq!(format!("{}", EntryPointKind::HttpServer), "HTTP Server");
557        assert_eq!(format!("{}", EntryPointKind::Library), "Library");
558    }
559
560    #[test]
561    fn test_render_onboard_markdown_empty() {
562        let data = OnboardData {
563            entry_points: vec![],
564            reading_order: ReadingOrder { layers: vec![] },
565            project_stats: ProjectStats {
566                total_files: 100,
567                total_lines: 5000,
568                languages: vec![("Rust".to_string(), 80), ("Python".to_string(), 20)],
569                module_count: 5,
570            },
571            narration: None,
572        };
573        let md = render_onboard_markdown(&data);
574        assert!(md.contains("## At a Glance"));
575        assert!(md.contains("100"));
576        assert!(md.contains("5000"));
577    }
578
579    #[test]
580    fn test_build_onboard_context() {
581        let data = OnboardData {
582            entry_points: vec![EntryPoint {
583                path: "src/main.rs".to_string(),
584                kind: EntryPointKind::CliBinary,
585                key_symbols: vec!["main".to_string()],
586            }],
587            reading_order: ReadingOrder {
588                layers: vec![ReadingLayer {
589                    depth: 0,
590                    label: "Entry Points".to_string(),
591                    files: vec!["src/main.rs".to_string()],
592                }],
593            },
594            project_stats: ProjectStats {
595                total_files: 50,
596                total_lines: 3000,
597                languages: vec![("Rust".to_string(), 50)],
598                module_count: 3,
599            },
600            narration: None,
601        };
602        let ctx = build_onboard_context(&data);
603        assert!(ctx.contains("src/main.rs"));
604        assert!(ctx.contains("CLI Binary"));
605        assert!(ctx.contains("Entry Points"));
606    }
607}