Skip to main content

oxdraw/
codemap.rs

1use anyhow::{Context, Result, anyhow, bail};
2use directories::ProjectDirs;
3use serde::{Deserialize, Serialize};
4use std::collections::hash_map::DefaultHasher;
5use std::collections::{HashMap, HashSet};
6use std::fs;
7use std::hash::{Hash, Hasher};
8use std::path::{Path, PathBuf};
9use std::process::Command;
10use walkdir::WalkDir;
11
12use crate::Diagram;
13
14#[derive(Debug, Serialize, Deserialize, Clone)]
15pub struct CodeMapMapping {
16    pub nodes: HashMap<String, CodeLocation>,
17}
18
19#[derive(Debug, Serialize, Deserialize, Clone)]
20pub struct CodeLocation {
21    pub file: String,
22    pub start_line: Option<usize>,
23    pub end_line: Option<usize>,
24    pub symbol: Option<String>,
25}
26
27#[derive(Debug, Serialize, Deserialize)]
28struct LlmResponse {
29    mermaid: String,
30    mapping: HashMap<String, CodeLocation>,
31}
32
33#[derive(Debug, Serialize, Deserialize)]
34struct CacheEntry {
35    commit: String,
36    diff_hash: u64,
37    mermaid: String,
38    mapping: CodeMapMapping,
39}
40
41#[derive(Debug, Serialize, Deserialize, Clone)]
42pub struct CodeMapMetadata {
43    pub path: Option<String>,
44    pub commit: Option<String>,
45    pub diff_hash: Option<u64>,
46}
47
48pub async fn generate_code_map(
49    path: &Path,
50    api_key: Option<String>,
51    model: Option<String>,
52    api_url: Option<String>,
53    regen: bool,
54    custom_prompt: Option<String>,
55    no_ai: bool,
56    max_nodes: usize,
57    gemini_key: Option<String>,
58) -> Result<(String, CodeMapMapping)> {
59    let git_info = get_git_info(path);
60
61    let project_dirs = ProjectDirs::from("", "", "oxdraw")
62        .ok_or_else(|| anyhow!("Could not determine config directory"))?;
63    let config_dir = project_dirs.config_dir();
64    fs::create_dir_all(config_dir).context("Failed to create config directory")?;
65
66    let abs_path = fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
67    let mut hasher = DefaultHasher::new();
68    abs_path.hash(&mut hasher);
69    let path_hash = hasher.finish();
70    let cache_path = config_dir.join(format!("cache_{:x}.json", path_hash));
71
72    if !regen {
73        if let Some((commit, diff_hash, _)) = &git_info {
74            if let Ok(cache_content) = fs::read_to_string(&cache_path) {
75                if let Ok(cache) = serde_json::from_str::<CacheEntry>(&cache_content) {
76                    if cache.commit == *commit && cache.diff_hash == *diff_hash {
77                        println!(
78                            "Using cached code map for commit {} (diff hash: {:x})",
79                            commit, diff_hash
80                        );
81                        return Ok((cache.mermaid, cache.mapping));
82                    }
83                }
84            }
85        }
86    }
87
88    if no_ai {
89        println!("Generating deterministic code map (no AI)...");
90        let (mermaid, mapping) = generate_deterministic_map(path, max_nodes)?;
91
92        // Cache the result
93        if let Some((commit, diff_hash, _)) = git_info {
94            let cache_entry = CacheEntry {
95                commit,
96                diff_hash,
97                mermaid: mermaid.clone(),
98                mapping: CodeMapMapping {
99                    nodes: mapping.nodes.clone(),
100                },
101            };
102            if let Ok(json) = serde_json::to_string_pretty(&cache_entry) {
103                let _ = fs::write(cache_path, json);
104            }
105        }
106        return Ok((mermaid, mapping));
107    }
108
109    println!("Scanning codebase at {}...", path.display());
110    let (file_summaries, granularity) = scan_codebase(path)?;
111
112    println!(
113        "Found {} files. Generating code map...",
114        file_summaries.len()
115    );
116
117    let base_prompt = match granularity {
118        Granularity::File => "You are an expert software engineer. Analyze the following source file and generate a Mermaid flowchart that explains its internal logic, control flow, and structure.
119
120        For each node in the diagram, you MUST provide a mapping to the specific code location that the node represents.
121        Prefer using symbol names (functions, classes, structs, etc.) over line numbers when possible, as line numbers are brittle.
122        IMPORTANT: The keys in the 'mapping' object MUST match exactly the node IDs used in the Mermaid diagram.",
123
124        Granularity::Directory => "You are an expert software architect. Analyze the files in the following directory and generate a Mermaid flowchart that explains the relationships and data flow between them.
125
126        For each node in the diagram, you MUST provide a mapping to the specific code location that the node represents.
127        Prefer using symbol names (functions, classes, structs, etc.) over line numbers when possible, as line numbers are brittle.
128        IMPORTANT: The keys in the 'mapping' object MUST match exactly the node IDs used in the Mermaid diagram.",
129
130        Granularity::Repo => "You are an expert software architect. Analyze the following codebase and generate a Mermaid flowchart that explains the high-level architecture and data flow.
131
132        For each node in the diagram, you MUST provide a mapping to the specific code location that the node represents.
133        Prefer using symbol names (functions, classes, structs, etc.) over line numbers when possible, as line numbers are brittle.
134        IMPORTANT: The keys in the 'mapping' object MUST match exactly the node IDs used in the Mermaid diagram.",
135    };
136
137    let mut prompt = format!(
138        "{}
139
140        Return ONLY a JSON object with the following structure. Do not include other components of mermaid syntax such as as style This is the JSON schema to follow:
141        {{
142            \"mermaid\": \"graph TD\\n    A[Node Label] --> B[Another Node]\",
143            \"mapping\": {{
144                \"A\": {{ \"file\": \"src/main.rs\", \"symbol\": \"main\", \"start_line\": 10, \"end_line\": 20 }},
145                \"B\": {{ \"file\": \"src/lib.rs\", \"symbol\": \"MyStruct\", \"start_line\": 5, \"end_line\": 15 }}
146            }}
147        }}
148        ", base_prompt
149    );
150
151    if let Some(custom) = custom_prompt {
152        prompt.push_str(&format!("\n\nUser Instructions:\n{}\n", custom));
153    }
154
155    prompt.push_str(&format!(
156        "\n\nHere are the files:\n\n{}",
157        file_summaries.join("\n\n")
158    ));
159
160    let client = reqwest::Client::builder()
161        .timeout(std::time::Duration::from_secs(120))
162        .build()?;
163
164    let (url, model) = if let Some(key) = &gemini_key {
165        let model = model.unwrap_or_else(|| "gemini-2.0-flash".to_string());
166        (
167            format!(
168                "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent?key={}",
169                model, key
170            ),
171            model,
172        )
173    } else {
174        (
175            api_url.unwrap_or_else(|| "http://localhost:8080/v1/responses".to_string()),
176            model.unwrap_or_else(|| "gemini-2.0-flash".to_string()),
177        )
178    };
179
180    let mut attempts = 0;
181    const MAX_ATTEMPTS: usize = 4;
182
183    loop {
184        attempts += 1;
185        if attempts > MAX_ATTEMPTS {
186            bail!(
187                "Failed to generate valid code map after {} attempts",
188                MAX_ATTEMPTS
189            );
190        }
191
192        if attempts > 1 {
193            println!("Attempt {}/{}...", attempts, MAX_ATTEMPTS);
194        }
195
196        let mut request = client.post(&url);
197
198        if gemini_key.is_some() {
199            let body = serde_json::json!({
200                "contents": [{
201                    "parts": [{
202                        "text": prompt
203                    }]
204                }]
205            });
206            request = request.json(&body);
207        } else {
208            let mut body = HashMap::new();
209            body.insert("model", model.clone());
210            body.insert("input", prompt.clone());
211            request = request.json(&body);
212
213            if let Some(key) = &api_key {
214                request = request.header("Authorization", format!("Bearer {}", key));
215            }
216        }
217
218        let response = request
219            .send()
220            .await
221            .context("Failed to send request to LLM")?;
222
223        if !response.status().is_success() {
224            let text = response.text().await?;
225            return Err(anyhow!("LLM API returned error: {}", text));
226        }
227
228        let response_json: serde_json::Value = response
229            .json()
230            .await
231            .context("Failed to parse LLM response JSON")?;
232
233        // Try to extract text from different possible formats
234        let output_text = if let Some(text) =
235            response_json.get("output_text").and_then(|v| v.as_str())
236        {
237            text.to_string()
238        } else if let Some(candidates) = response_json.get("candidates").and_then(|v| v.as_array())
239        {
240            // Gemini format
241            candidates
242                .first()
243                .and_then(|c| c.get("content"))
244                .and_then(|c| c.get("parts"))
245                .and_then(|p| p.as_array())
246                .and_then(|p| p.first())
247                .and_then(|p| p.get("text"))
248                .and_then(|t| t.as_str())
249                .ok_or_else(|| anyhow!("Could not find content in Gemini response"))?
250                .to_string()
251        } else if let Some(choices) = response_json.get("choices").and_then(|v| v.as_array()) {
252            // Standard OpenAI format
253            choices
254                .first()
255                .and_then(|c| c.get("message"))
256                .and_then(|m| m.get("content"))
257                .and_then(|c| c.as_str())
258                .ok_or_else(|| anyhow!("Could not find content in OpenAI response"))?
259                .to_string()
260        } else {
261            // Fallback for the custom format
262            if let Some(output) = response_json.get("output").and_then(|v| v.as_array()) {
263                if let Some(first) = output.first() {
264                    if let Some(content) = first.get("content").and_then(|v| v.as_array()) {
265                        if let Some(first_content) = content.first() {
266                            if let Some(text) = first_content.get("text").and_then(|v| v.as_str()) {
267                                text.to_string()
268                            } else {
269                                return Err(anyhow!("Unknown response format (deep nested)"));
270                            }
271                        } else {
272                            return Err(anyhow!("Unknown response format (empty content)"));
273                        }
274                    } else {
275                        return Err(anyhow!("Unknown response format (no content array)"));
276                    }
277                } else {
278                    return Err(anyhow!("Unknown response format (empty output)"));
279                }
280            } else {
281                return Err(anyhow!("Unknown response format: {:?}", response_json));
282            }
283        };
284
285        // Clean up the output text (remove markdown code blocks if present)
286        let clean_json = output_text
287            .trim()
288            .trim_start_matches("```json")
289            .trim_start_matches("```")
290            .trim_end_matches("```")
291            .trim();
292
293        let result: LlmResponse = match serde_json::from_str(clean_json) {
294            Ok(r) => r,
295            Err(e) => {
296                println!("Failed to parse JSON: {}", e);
297                prompt.push_str(&format!("\n\nYour previous response was not valid JSON: {}. Please return ONLY valid JSON.", e));
298                continue;
299            }
300        };
301
302        // Validate the result
303        match validate_response(&result) {
304            Ok(_) => {
305                // Save to cache if we have git info
306                if let Some((commit, diff_hash, _)) = git_info {
307                    let cache_entry = CacheEntry {
308                        commit,
309                        diff_hash,
310                        mermaid: result.mermaid.clone(),
311                        mapping: CodeMapMapping {
312                            nodes: result.mapping.clone(),
313                        },
314                    };
315                    if let Ok(json) = serde_json::to_string_pretty(&cache_entry) {
316                        let _ = fs::write(cache_path, json);
317                    }
318                }
319                return Ok((
320                    result.mermaid,
321                    CodeMapMapping {
322                        nodes: result.mapping,
323                    },
324                ));
325            }
326            Err(e) => {
327                println!("Validation failed: {}", e);
328                prompt.push_str(&format!("\n\nYour previous response failed validation: {}. Please fix the diagram and mapping.", e));
329                continue;
330            }
331        }
332    }
333}
334
335fn validate_response(response: &LlmResponse) -> Result<()> {
336    // 1. Parse Mermaid
337    let diagram =
338        Diagram::parse(&response.mermaid).context("Failed to parse generated Mermaid diagram")?;
339
340    // 2. Check Mapping Completeness
341    for node_id in diagram.nodes.keys() {
342        if !response.mapping.contains_key(node_id) {
343            bail!(
344                "Node '{}' is present in the diagram but missing from the mapping object.",
345                node_id
346            );
347        }
348    }
349
350    // 3. Check for Isolated Nodes (if more than 1 node)
351    if diagram.nodes.len() > 1 {
352        let mut connected_nodes = HashSet::new();
353        for edge in &diagram.edges {
354            connected_nodes.insert(&edge.from);
355            connected_nodes.insert(&edge.to);
356        }
357
358        for node_id in diagram.nodes.keys() {
359            if !connected_nodes.contains(node_id) {
360                bail!(
361                    "Node '{}' is isolated (not connected to any other node). All nodes must be connected.",
362                    node_id
363                );
364            }
365        }
366    }
367
368    Ok(())
369}
370
371pub fn get_git_info(path: &Path) -> Option<(String, u64, PathBuf)> {
372    // Get git root
373    let root_output = Command::new("git")
374        .args(&["rev-parse", "--show-toplevel"])
375        .current_dir(path)
376        .output()
377        .ok()?;
378
379    if !root_output.status.success() {
380        return None;
381    }
382    let root_str = String::from_utf8_lossy(&root_output.stdout)
383        .trim()
384        .to_string();
385    let root_path = PathBuf::from(root_str);
386
387    // Get commit hash
388    let output = Command::new("git")
389        .args(&["rev-parse", "HEAD"])
390        .current_dir(path)
391        .output()
392        .ok()?;
393
394    if !output.status.success() {
395        return None;
396    }
397
398    let commit = String::from_utf8_lossy(&output.stdout).trim().to_string();
399
400    // Get diff hash
401    let diff_output = Command::new("git")
402        .args(&["diff", "HEAD"])
403        .current_dir(path)
404        .output()
405        .ok()?;
406
407    let mut hasher = DefaultHasher::new();
408    diff_output.stdout.hash(&mut hasher);
409    let diff_hash = hasher.finish();
410
411    Some((commit, diff_hash, root_path))
412}
413
414#[derive(Debug, PartialEq)]
415enum Granularity {
416    Repo,
417    Directory,
418    File,
419}
420
421fn scan_codebase(root_path: &Path) -> Result<(Vec<String>, Granularity)> {
422    let mut summaries = Vec::new();
423    let mut total_chars = 0;
424    const MAX_TOTAL_CHARS: usize = 100_000; // Limit total context size
425
426    if root_path.is_file() {
427        if let Ok(content) = fs::read_to_string(root_path) {
428            let file_name = root_path.file_name().unwrap_or_default().to_string_lossy();
429            summaries.push(format!("File: {}\n```\n{}\n```", file_name, content));
430            return Ok((summaries, Granularity::File));
431        }
432    }
433
434    // Basic ignore list
435    let include_exts = vec![
436        "rs", "ts", "tsx", "js", "jsx", "py", "go", "java", "c", "cpp", "h",
437    ];
438    let ignore_dirs = vec![
439        "target",
440        "node_modules",
441        ".git",
442        "dist",
443        "build",
444        ".next",
445        "out",
446    ];
447
448    let walker = WalkDir::new(root_path).into_iter();
449
450    for entry in walker.filter_entry(|e| {
451        let file_name = e.file_name().to_string_lossy();
452        !ignore_dirs.iter().any(|d| file_name == *d)
453    }) {
454        let entry = entry?;
455        let path = entry.path();
456
457        if path.is_dir() {
458            continue;
459        }
460
461        if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
462            if include_exts.contains(&ext) {
463                if let Ok(content) = fs::read_to_string(path) {
464                    // Truncate if too large
465                    let truncated = if content.len() > 10000 {
466                        format!("{}... (truncated)", &content[..10000])
467                    } else {
468                        content
469                    };
470
471                    if total_chars + truncated.len() > MAX_TOTAL_CHARS {
472                        break; // Stop if we exceed the budget
473                    }
474
475                    total_chars += truncated.len();
476
477                    // Get relative path
478                    let rel_path = path
479                        .strip_prefix(root_path)
480                        .unwrap_or(path)
481                        .to_string_lossy();
482                    summaries.push(format!("File: {}\n```\n{}\n```", rel_path, truncated));
483                }
484            }
485        }
486    }
487
488    // Determine if it's a repo or just a directory
489    let granularity = if root_path.join(".git").exists() {
490        Granularity::Repo
491    } else {
492        Granularity::Directory
493    };
494
495    Ok((summaries, granularity))
496}
497
498pub fn extract_code_mappings(source: &str) -> (CodeMapMapping, CodeMapMetadata) {
499    let mut nodes = HashMap::new();
500    let mut metadata = CodeMapMetadata {
501        path: None,
502        commit: None,
503        diff_hash: None,
504    };
505
506    for line in source.lines() {
507        let trimmed = line.trim();
508        if trimmed.starts_with("%% OXDRAW CODE") {
509            // Parse: %% OXDRAW CODE <NodeID> <FilePath> [line:<Start>-<End>] [def:<Symbol>]
510            let parts: Vec<&str> = trimmed.split_whitespace().collect();
511            if parts.len() >= 4 {
512                let node_id = parts[3].to_string();
513                let file_path = parts[4].to_string();
514                let mut start_line = None;
515                let mut end_line = None;
516                let mut symbol = None;
517
518                for part in parts.iter().skip(5) {
519                    if let Some(range) = part.strip_prefix("line:") {
520                        if let Some((start, end)) = range.split_once('-') {
521                            start_line = start.parse().ok();
522                            end_line = end.parse().ok();
523                        }
524                    } else if let Some(sym) = part.strip_prefix("def:") {
525                        symbol = Some(sym.to_string());
526                    }
527                }
528
529                nodes.insert(
530                    node_id,
531                    CodeLocation {
532                        file: file_path,
533                        start_line,
534                        end_line,
535                        symbol,
536                    },
537                );
538            }
539        } else if trimmed.starts_with("%% OXDRAW META") {
540            // Parse: %% OXDRAW META path:<Path> commit:<Commit> diff_hash:<Hash>
541            let parts: Vec<&str> = trimmed.split_whitespace().collect();
542            for part in parts.iter().skip(3) {
543                // Skip "%%", "OXDRAW", "META"
544                if let Some(val) = part.strip_prefix("path:") {
545                    metadata.path = Some(val.to_string());
546                } else if let Some(val) = part.strip_prefix("commit:") {
547                    metadata.commit = Some(val.to_string());
548                } else if let Some(val) = part.strip_prefix("diff_hash:") {
549                    metadata.diff_hash = val.parse().ok();
550                }
551            }
552        }
553    }
554    (CodeMapMapping { nodes }, metadata)
555}
556
557pub fn serialize_codemap(
558    mermaid: &str,
559    mapping: &CodeMapMapping,
560    metadata: &CodeMapMetadata,
561) -> String {
562    let mut output = mermaid.to_string();
563    if !output.ends_with('\n') {
564        output.push('\n');
565    }
566    output.push_str("\n");
567
568    for (node_id, location) in &mapping.nodes {
569        let mut parts = Vec::new();
570        if let (Some(start), Some(end)) = (location.start_line, location.end_line) {
571            parts.push(format!("line:{}-{}", start, end));
572        }
573        if let Some(symbol) = &location.symbol {
574            parts.push(format!("def:{}", symbol));
575        }
576
577        let extra = if parts.is_empty() {
578            String::new()
579        } else {
580            format!(" {}", parts.join(" "))
581        };
582
583        output.push_str(&format!(
584            "%% OXDRAW CODE {} {}{}\n",
585            node_id, location.file, extra
586        ));
587    }
588
589    let mut meta_line = String::from("%% OXDRAW META");
590    if let Some(path) = &metadata.path {
591        meta_line.push_str(&format!(" path:{}", path));
592    }
593    if let Some(commit) = &metadata.commit {
594        meta_line.push_str(&format!(" commit:{}", commit));
595    }
596    if let Some(diff_hash) = &metadata.diff_hash {
597        meta_line.push_str(&format!(" diff_hash:{}", diff_hash));
598    }
599    output.push_str(&meta_line);
600    output.push('\n');
601
602    output
603}
604
605impl CodeMapMapping {
606    pub fn resolve_symbols(&mut self, root: &Path) {
607        let mut file_cache: HashMap<String, String> = HashMap::new();
608
609        for location in self.nodes.values_mut() {
610            // If we already have line numbers, we might want to verify them or just keep them.
611            // But if we have a symbol and no lines (or we want to refresh), we resolve.
612            // For now, let's prioritize the symbol if present.
613            if let Some(symbol) = &location.symbol {
614                if !file_cache.contains_key(&location.file) {
615                    let file_path = root.join(&location.file);
616                    if file_path.exists() {
617                        if let Ok(content) = fs::read_to_string(&file_path) {
618                            file_cache.insert(location.file.clone(), content);
619                        }
620                    }
621                }
622
623                if let Some(content) = file_cache.get(&location.file) {
624                    if let Some((start, end)) =
625                        find_symbol_definition(content, symbol, &location.file)
626                    {
627                        location.start_line = Some(start);
628                        location.end_line = Some(end);
629                    }
630                }
631            }
632        }
633    }
634}
635
636fn find_symbol_definition(content: &str, symbol: &str, file_path: &str) -> Option<(usize, usize)> {
637    let ext = Path::new(file_path)
638        .extension()
639        .and_then(|s| s.to_str())
640        .unwrap_or("");
641
642    // Simple regex-based finder for now.
643    // This is not perfect but covers many cases without heavy dependencies.
644
645    let patterns = match ext {
646        "rs" => vec![
647            format!(r"fn\s+{}\b", regex::escape(symbol)),
648            format!(r"struct\s+{}\b", regex::escape(symbol)),
649            format!(r"enum\s+{}\b", regex::escape(symbol)),
650            format!(r"trait\s+{}\b", regex::escape(symbol)),
651            format!(r"mod\s+{}\b", regex::escape(symbol)),
652            format!(r"type\s+{}\b", regex::escape(symbol)),
653            format!(r"const\s+{}\b", regex::escape(symbol)),
654        ],
655        "ts" | "tsx" | "js" | "jsx" => vec![
656            format!(r"function\s+{}\b", regex::escape(symbol)),
657            format!(r"class\s+{}\b", regex::escape(symbol)),
658            format!(r"interface\s+{}\b", regex::escape(symbol)),
659            format!(r"type\s+{}\b", regex::escape(symbol)),
660            format!(r"const\s+{}\s*=", regex::escape(symbol)),
661            format!(r"let\s+{}\s*=", regex::escape(symbol)),
662            format!(r"var\s+{}\s*=", regex::escape(symbol)),
663        ],
664        "py" => vec![
665            format!(r"def\s+{}\b", regex::escape(symbol)),
666            format!(r"class\s+{}\b", regex::escape(symbol)),
667        ],
668        "go" => vec![
669            format!(r"func\s+{}\b", regex::escape(symbol)),
670            format!(r"type\s+{}\b", regex::escape(symbol)),
671        ],
672        _ => vec![
673            format!(r"{}\b", regex::escape(symbol)), // Fallback: just the name
674        ],
675    };
676
677    for pattern in patterns {
678        if let Ok(re) = regex::Regex::new(&pattern) {
679            if let Some(mat) = re.find(content) {
680                // Found the start. Now try to estimate the end.
681                // This is hard without a parser.
682                // For now, let's just return the line where it starts, and maybe 10 lines after?
683                // Or just the single line if we can't determine scope.
684
685                let start_byte = mat.start();
686                let start_line = content[..start_byte].lines().count() + 1;
687
688                // Heuristic for end line: count braces?
689                // This is very rough.
690                let end_line = estimate_block_end(content, start_byte)
691                    .map(|l| l + 1)
692                    .unwrap_or(start_line);
693
694                return Some((start_line, end_line));
695            }
696        }
697    }
698
699    None
700}
701
702fn estimate_block_end(content: &str, start_byte: usize) -> Option<usize> {
703    let mut open_braces = 0;
704    let mut found_brace = false;
705    let mut lines = 0;
706    let start_line_num = content[..start_byte].lines().count();
707
708    for (_i, char) in content[start_byte..].char_indices() {
709        if char == '{' {
710            open_braces += 1;
711            found_brace = true;
712        } else if char == '}' {
713            open_braces -= 1;
714        }
715
716        if char == '\n' {
717            lines += 1;
718        }
719
720        if found_brace && open_braces == 0 {
721            return Some(start_line_num + lines);
722        }
723
724        // Safety break for very long blocks or missing braces
725        if lines > 500 {
726            break;
727        }
728    }
729
730    // If no braces found (e.g. Python), maybe look for indentation?
731    // For now, fallback to just a few lines.
732    if !found_brace {
733        return Some(start_line_num + 5);
734    }
735
736    None
737}
738
739fn generate_deterministic_map(
740    root_path: &Path,
741    max_nodes: usize,
742) -> Result<(String, CodeMapMapping)> {
743    let mut nodes = HashMap::new();
744    let mut edges = Vec::new();
745    let mut symbol_to_node_id = HashMap::new();
746
747    // 1. Scan files and find definitions
748    let walker = WalkDir::new(root_path).into_iter();
749    let include_exts = vec!["rs", "ts", "tsx", "js", "jsx", "py", "go"];
750    let ignore_dirs = vec![
751        "target",
752        "node_modules",
753        ".git",
754        "dist",
755        "build",
756        ".next",
757        "out",
758    ];
759
760    let mut files_content = HashMap::new();
761
762    'outer: for entry in walker.filter_entry(|e| {
763        let file_name = e.file_name().to_string_lossy();
764        !ignore_dirs.iter().any(|d| file_name == *d)
765    }) {
766        let entry = entry?;
767        let path = entry.path();
768        if path.is_dir() {
769            continue;
770        }
771
772        if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
773            if include_exts.contains(&ext) {
774                if let Ok(content) = fs::read_to_string(path) {
775                    let rel_path = if root_path.is_file() {
776                        path.file_name()
777                            .unwrap_or_default()
778                            .to_string_lossy()
779                            .to_string()
780                    } else {
781                        path.strip_prefix(root_path)
782                            .unwrap_or(path)
783                            .to_string_lossy()
784                            .to_string()
785                    };
786                    files_content.insert(rel_path.clone(), (content.clone(), ext.to_string()));
787
788                    let defs = find_all_definitions(&content, ext);
789                    for (symbol, start, end) in defs {
790                        if nodes.len() >= max_nodes {
791                            println!(
792                                "Warning: Hit node limit ({}). Stopping scan to prevent huge diagrams.",
793                                max_nodes
794                            );
795                            break 'outer;
796                        }
797
798                        let node_id = format!("node_{}", nodes.len());
799                        nodes.insert(
800                            node_id.clone(),
801                            CodeLocation {
802                                file: rel_path.clone(),
803                                start_line: Some(start),
804                                end_line: Some(end),
805                                symbol: Some(symbol.clone()),
806                            },
807                        );
808                        symbol_to_node_id.insert(symbol, node_id);
809                    }
810                }
811            }
812        }
813    }
814
815    // 2. Scan bodies for calls
816    for (node_id, location) in &nodes {
817        if location.symbol.is_some() {
818            if let Some((content, _)) = files_content.get(&location.file) {
819                let start_line = location.start_line.unwrap_or(0);
820                let end_line = location.end_line.unwrap_or(content.lines().count());
821
822                // Extract body content (approximate)
823                let take_count = if end_line >= start_line {
824                    end_line - start_line + 1
825                } else {
826                    0
827                };
828
829                let body: String = content
830                    .lines()
831                    .skip(start_line.saturating_sub(1))
832                    .take(take_count)
833                    .collect::<Vec<&str>>()
834                    .join("\n");
835
836                for (target_symbol, target_id) in &symbol_to_node_id {
837                    if target_id == node_id {
838                        continue;
839                    } // Don't link to self
840
841                    // Check if body contains target_symbol
842                    if body.contains(target_symbol) {
843                        // Verify with regex for word boundary
844                        if let Ok(re) =
845                            regex::Regex::new(&format!(r"\b{}\b", regex::escape(target_symbol)))
846                        {
847                            if re.is_match(&body) {
848                                edges.push((node_id.clone(), target_id.clone()));
849                            }
850                        }
851                    }
852                }
853            }
854        }
855    }
856
857    // 3. Generate Mermaid
858    let mut mermaid = String::from("graph TD\n");
859    for (id, location) in &nodes {
860        let label = location.symbol.as_deref().unwrap_or("?");
861        // Sanitize label for Mermaid
862        let safe_label = label.replace("\"", "'").replace("[", "(").replace("]", ")");
863        mermaid.push_str(&format!("    {}[{}]\n", id, safe_label));
864    }
865
866    // Deduplicate edges
867    edges.sort();
868    edges.dedup();
869
870    for (from, to) in edges {
871        mermaid.push_str(&format!("    {} --> {}\n", from, to));
872    }
873
874    Ok((mermaid, CodeMapMapping { nodes }))
875}
876
877fn find_all_definitions(content: &str, ext: &str) -> Vec<(String, usize, usize)> {
878    let mut defs = Vec::new();
879
880    let patterns = match ext {
881        "rs" => vec![
882            r"fn\s+(\w+)",
883            r"struct\s+(\w+)",
884            r"enum\s+(\w+)",
885            r"trait\s+(\w+)",
886            r"mod\s+(\w+)",
887        ],
888        "ts" | "tsx" | "js" | "jsx" => vec![
889            r"function\s+(\w+)",
890            r"class\s+(\w+)",
891            r"interface\s+(\w+)",
892            r"const\s+(\w+)\s*=",
893            r"let\s+(\w+)\s*=",
894        ],
895        "py" => vec![r"def\s+(\w+)", r"class\s+(\w+)"],
896        "go" => vec![r"func\s+(\w+)", r"type\s+(\w+)"],
897        _ => vec![],
898    };
899
900    for pattern in patterns {
901        if let Ok(re) = regex::Regex::new(pattern) {
902            for cap in re.captures_iter(content) {
903                if let Some(m) = cap.get(1) {
904                    let symbol = m.as_str().to_string();
905                    let start_byte = m.start();
906                    let start_line = content[..start_byte].lines().count() + 1; // 1-based
907                    let end_line = estimate_block_end(content, start_byte)
908                        .map(|l| l + 1)
909                        .unwrap_or(start_line);
910                    defs.push((symbol, start_line, end_line));
911                }
912            }
913        }
914    }
915
916    defs
917}