Skip to main content

cortex_runtime/cli/
map_cmd.rs

1//! `cortex map <domain>` — map a website into a navigable graph.
2
3use crate::cli::output::{self, Styled};
4use crate::intelligence::cache::MapCache;
5use crate::map::types::SiteMap;
6use anyhow::{Context, Result};
7use std::collections::HashMap;
8use std::time::Instant;
9
10/// Run the map command.
11pub async fn run(
12    domain: &str,
13    max_nodes: u32,
14    max_render: u32,
15    timeout: u64,
16    fresh: bool,
17) -> Result<()> {
18    let s = Styled::new();
19    let start = Instant::now();
20
21    // Check for cached map first (unless --fresh)
22    if !fresh {
23        let mut cache = MapCache::default_cache()?;
24        if let Some(path) = cache.get(domain) {
25            let data = std::fs::read(path)?;
26            let map = SiteMap::deserialize(&data).context("failed to load cached map")?;
27
28            if output::is_json() {
29                print_map_json(&map, start.elapsed());
30                return Ok(());
31            }
32
33            if !output::is_quiet() {
34                let age = path
35                    .metadata()
36                    .ok()
37                    .and_then(|m| m.modified().ok())
38                    .and_then(|t| t.elapsed().ok())
39                    .map(|d| output::format_duration(d.as_secs()))
40                    .unwrap_or_else(|| "unknown".to_string());
41                eprintln!("  Using cached map ({age} old). Use --fresh to re-map.");
42                eprintln!();
43            }
44
45            print_map_stats(&s, &map, start.elapsed());
46            return Ok(());
47        }
48    }
49
50    // First-run auto-setup: install Chromium and start daemon if needed
51    let show_progress = !output::is_quiet() && !output::is_json();
52
53    if show_progress {
54        eprintln!();
55        eprintln!("  {} Mapping {domain}", s.bold("CORTEX —"));
56        eprintln!();
57    }
58
59    let needs_setup = auto_setup_if_needed().await?;
60    if needs_setup && show_progress {
61        eprintln!();
62    }
63
64    // Connect to the daemon socket and send a MAP request
65    use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
66    use tokio::net::UnixStream;
67
68    let socket_path = "/tmp/cortex.sock";
69    let mut stream = match UnixStream::connect(socket_path).await {
70        Ok(s) => s,
71        Err(_) => {
72            if output::is_json() {
73                output::print_json(&serde_json::json!({
74                    "status": "daemon_required",
75                    "message": "Cannot connect to Cortex daemon",
76                    "hint": "Start with: cortex start"
77                }));
78            } else if !output::is_quiet() {
79                eprintln!("  Cannot connect to Cortex daemon.");
80                eprintln!("  Start the daemon with: cortex start");
81            }
82            return Ok(());
83        }
84    };
85
86    // Spawn a background SSE listener for live progress (best effort)
87    let sse_domain = domain.to_string();
88    let sse_handle = if show_progress {
89        Some(tokio::spawn(stream_progress_from_sse(sse_domain)))
90    } else {
91        None
92    };
93
94    let req = serde_json::json!({
95        "id": format!("map-{}", std::process::id()),
96        "method": "map",
97        "params": {
98            "domain": domain,
99            "max_nodes": max_nodes,
100            "max_render": max_render,
101            "max_time_ms": timeout,
102            "respect_robots": true,
103        }
104    });
105    let req_str = format!("{}\n", req);
106    stream
107        .write_all(req_str.as_bytes())
108        .await
109        .context("failed to send MAP request")?;
110
111    // Read response (with generous timeout for mapping)
112    let (reader, _writer) = stream.into_split();
113    let mut reader = BufReader::new(reader);
114    let mut line = String::new();
115    let response_timeout = std::time::Duration::from_millis(timeout + 30000);
116    let read_result = tokio::time::timeout(response_timeout, reader.read_line(&mut line)).await;
117
118    // Cancel the SSE listener
119    if let Some(handle) = sse_handle {
120        handle.abort();
121    }
122
123    match read_result {
124        Ok(Ok(n)) if n > 0 => {} // Data received into `line`
125        Ok(Ok(_)) => {
126            if !output::is_quiet() {
127                eprintln!("  Connection closed by server.");
128            }
129            return Ok(());
130        }
131        Ok(Err(e)) => {
132            if !output::is_quiet() {
133                eprintln!("  Read error: {e}");
134            }
135            return Ok(());
136        }
137        Err(_) => {
138            if !output::is_quiet() {
139                eprintln!("  Mapping timed out after {}ms.", timeout + 30000);
140            }
141            return Ok(());
142        }
143    };
144
145    let response: serde_json::Value =
146        serde_json::from_str(line.trim()).context("failed to parse response")?;
147
148    if let Some(error) = response.get("error") {
149        if output::is_json() {
150            output::print_json(&response);
151        } else if !output::is_quiet() {
152            let msg = error
153                .get("message")
154                .and_then(|v| v.as_str())
155                .unwrap_or("unknown error");
156            eprintln!("  Mapping failed: {msg}");
157        }
158        return Ok(());
159    }
160
161    let result = response.get("result").cloned().unwrap_or_default();
162    let node_count = result
163        .get("node_count")
164        .and_then(|v| v.as_u64())
165        .unwrap_or(0);
166    let edge_count = result
167        .get("edge_count")
168        .and_then(|v| v.as_u64())
169        .unwrap_or(0);
170
171    if output::is_json() {
172        output::print_json(&result);
173        return Ok(());
174    }
175
176    if show_progress {
177        let elapsed = start.elapsed();
178        eprintln!();
179        eprintln!("  {} Mapped {domain}", s.ok_sym());
180        eprintln!(
181            "    {} nodes  ·  {} edges  ·  {:.1}s",
182            format_count(node_count),
183            format_count(edge_count),
184            elapsed.as_secs_f64()
185        );
186        eprintln!();
187        eprintln!("  Query with: cortex query {domain} --type product_detail");
188
189        // Show dashboard hint if HTTP is available
190        eprintln!("  Dashboard:  http://localhost:7700/dashboard");
191    }
192
193    // Cache the map binary if available
194    if let Some(map_path) = result.get("map_path").and_then(|v| v.as_str()) {
195        if show_progress {
196            eprintln!("  Cached at:  {map_path}");
197        }
198    }
199
200    Ok(())
201}
202
203/// Print map stats in branded format.
204fn print_map_stats(s: &Styled, map: &SiteMap, elapsed: std::time::Duration) {
205    let rendered = map.nodes.iter().filter(|n| n.flags.is_rendered()).count();
206    let estimated = map.nodes.len() - rendered;
207
208    // Count page types
209    let mut type_counts: HashMap<String, usize> = HashMap::new();
210    for node in &map.nodes {
211        let name = format!("{:?}", node.page_type).to_lowercase();
212        *type_counts.entry(name).or_default() += 1;
213    }
214    let mut type_vec: Vec<(String, usize)> = type_counts.into_iter().collect();
215    type_vec.sort_by(|a, b| b.1.cmp(&a.1));
216
217    let total_nodes = map.nodes.len();
218
219    eprintln!("  Map complete in {:.1}s", elapsed.as_secs_f64());
220    eprintln!();
221    eprintln!("  {}", s.bold(&format!("{:<45}", map.header.domain)));
222    eprintln!(
223        "  Nodes:     {} ({} rendered, {} estimated)",
224        total_nodes, rendered, estimated
225    );
226    eprintln!("  Edges:     {}", map.edges.len());
227    if !map.cluster_centroids.is_empty() {
228        eprintln!("  Clusters:  {}", map.cluster_centroids.len());
229    }
230    eprintln!("  Actions:   {}", map.actions.len());
231    eprintln!();
232
233    if !type_vec.is_empty() {
234        eprintln!("  Top page types:");
235        for (name, count) in type_vec.iter().take(5) {
236            let pct = if total_nodes > 0 {
237                (count * 100) / total_nodes
238            } else {
239                0
240            };
241            eprintln!("    {:<20} {:>6}  ({pct}%)", name, count);
242        }
243    }
244
245    eprintln!();
246    eprintln!(
247        "  Query with: cortex query {} --type product_detail",
248        map.header.domain
249    );
250}
251
252/// Print map stats as JSON.
253fn print_map_json(map: &SiteMap, elapsed: std::time::Duration) {
254    let rendered = map.nodes.iter().filter(|n| n.flags.is_rendered()).count();
255
256    let mut type_counts: HashMap<String, usize> = HashMap::new();
257    for node in &map.nodes {
258        let name = format!("{:?}", node.page_type).to_lowercase();
259        *type_counts.entry(name).or_default() += 1;
260    }
261
262    output::print_json(&serde_json::json!({
263        "domain": map.header.domain,
264        "nodes": map.nodes.len(),
265        "edges": map.edges.len(),
266        "rendered": rendered,
267        "clusters": map.cluster_centroids.len(),
268        "actions": map.actions.len(),
269        "page_types": type_counts,
270        "duration_ms": elapsed.as_millis(),
271    }));
272}
273
274/// Auto-install Chromium and auto-start daemon if needed.
275///
276/// Returns `true` if any setup action was taken, `false` if already ready.
277///
278/// This implements the "first-run experience" from Section 6 of the edge cases doc:
279/// `cortex map` should auto-install and auto-start so users never need to run
280/// `cortex install` and `cortex start` separately on first use.
281async fn auto_setup_if_needed() -> Result<bool> {
282    let mut did_something = false;
283
284    // Check if Chromium is installed
285    let chromium_path = crate::cli::doctor::find_chromium();
286    if chromium_path.is_none() {
287        if !output::is_quiet() {
288            let s = Styled::new();
289            eprintln!(
290                "  {} Cortex is not set up yet. Let's get you started:",
291                s.info_sym()
292            );
293            eprintln!();
294            eprintln!("  [1/2] Installing Chromium...");
295        }
296        // Try to install
297        match crate::cli::install_cmd::run_with_force(false).await {
298            Ok(()) => {
299                did_something = true;
300            }
301            Err(e) => {
302                // Installation failed — give clear instructions
303                if !output::is_quiet() {
304                    eprintln!("    Failed to auto-install Chromium: {e}");
305                    eprintln!("    Run 'cortex install' manually for detailed output.");
306                }
307                return Err(e);
308            }
309        }
310    }
311
312    // Check if daemon is running
313    let socket_path = std::path::PathBuf::from("/tmp/cortex.sock");
314    if !socket_path.exists() {
315        if !output::is_quiet() {
316            if did_something {
317                eprintln!("  [2/2] Starting Cortex process...");
318            } else {
319                let s = Styled::new();
320                eprintln!("  {} Cortex is not running. Starting...", s.info_sym());
321            }
322        }
323        match crate::cli::start::run().await {
324            Ok(()) => {
325                did_something = true;
326                // Brief pause for the daemon to initialize
327                tokio::time::sleep(std::time::Duration::from_millis(300)).await;
328            }
329            Err(e) => {
330                if !output::is_quiet() {
331                    eprintln!("    Failed to auto-start: {e}");
332                    eprintln!("    Run 'cortex start' manually for details.");
333                }
334                return Err(e);
335            }
336        }
337    }
338
339    Ok(did_something)
340}
341
342/// Format a number with comma separators for display.
343fn format_count(n: u64) -> String {
344    if n < 1_000 {
345        return n.to_string();
346    }
347    let s = n.to_string();
348    let mut result = String::new();
349    for (i, ch) in s.chars().rev().enumerate() {
350        if i > 0 && i % 3 == 0 {
351            result.push(',');
352        }
353        result.push(ch);
354    }
355    result.chars().rev().collect()
356}
357
358/// Subscribe to SSE events and print live mapping progress.
359///
360/// Best-effort: if the REST API is not running, this silently returns.
361/// Runs concurrently with the socket MAP request. Uses a raw TCP
362/// connection to avoid additional crate dependencies for streaming.
363async fn stream_progress_from_sse(domain: String) {
364    use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
365    use tokio::net::TcpStream;
366
367    // Connect to the REST API
368    let mut stream = match TcpStream::connect("127.0.0.1:7700").await {
369        Ok(s) => s,
370        Err(_) => return, // REST API not available — skip live progress
371    };
372
373    // Send HTTP GET request for SSE
374    let request = format!(
375        "GET /api/v1/events?domain={domain} HTTP/1.1\r\n\
376         Host: 127.0.0.1:7700\r\n\
377         Accept: text/event-stream\r\n\
378         Connection: keep-alive\r\n\
379         \r\n"
380    );
381    if stream.write_all(request.as_bytes()).await.is_err() {
382        return;
383    }
384
385    let s = Styled::new();
386    let reader = tokio::io::BufReader::new(stream);
387    let mut lines = reader.lines();
388
389    // Skip HTTP response headers
390    let mut past_headers = false;
391    while let Ok(Some(line)) = lines.next_line().await {
392        if line.is_empty() {
393            past_headers = true;
394            break;
395        }
396    }
397    if !past_headers {
398        return;
399    }
400
401    // Read SSE data lines
402    while let Ok(Some(line)) = lines.next_line().await {
403        let trimmed = line.trim();
404        if !trimmed.starts_with("data:") {
405            continue;
406        }
407        let json_str = trimmed.trim_start_matches("data:").trim();
408        if json_str.is_empty() {
409            continue;
410        }
411
412        let event: serde_json::Value = match serde_json::from_str(json_str) {
413            Ok(v) => v,
414            Err(_) => continue,
415        };
416
417        let event_type = event.get("type").and_then(|v| v.as_str()).unwrap_or("");
418        match event_type {
419            "SitemapDiscovered" => {
420                let count = event.get("url_count").and_then(|v| v.as_u64()).unwrap_or(0);
421                let ms = event
422                    .get("elapsed_ms")
423                    .and_then(|v| v.as_u64())
424                    .unwrap_or(0);
425                eprintln!(
426                    "  Layer 0  {:<22} {} URLs discovered {:>20} {}",
427                    "Metadata",
428                    format_count(count),
429                    format!("{:.1}s", ms as f64 / 1000.0),
430                    s.ok_sym(),
431                );
432            }
433            "StructuredDataExtracted" => {
434                let pages = event
435                    .get("pages_fetched")
436                    .and_then(|v| v.as_u64())
437                    .unwrap_or(0);
438                let jsonld = event
439                    .get("jsonld_found")
440                    .and_then(|v| v.as_u64())
441                    .unwrap_or(0);
442                let patterns = event
443                    .get("patterns_used")
444                    .and_then(|v| v.as_u64())
445                    .unwrap_or(0);
446                let ms = event
447                    .get("elapsed_ms")
448                    .and_then(|v| v.as_u64())
449                    .unwrap_or(0);
450                let pct = if pages > 0 { (jsonld * 100) / pages } else { 0 };
451                eprintln!(
452                    "  Layer 1  {:<22} {} pages, {} JSON-LD ({}%) {:>15} {}",
453                    "Structured Data",
454                    pages,
455                    jsonld,
456                    pct,
457                    format!("{:.1}s", ms as f64 / 1000.0),
458                    s.ok_sym(),
459                );
460                if patterns > 0 {
461                    eprintln!(
462                        "  Layer 1½ {:<22} {} pages enriched via CSS selectors",
463                        "Pattern Engine", patterns,
464                    );
465                }
466            }
467            "LayerComplete" => {
468                let layer = event.get("layer").and_then(|v| v.as_u64()).unwrap_or(0);
469                let name = event
470                    .get("layer_name")
471                    .and_then(|v| v.as_str())
472                    .unwrap_or("?");
473                let ms = event
474                    .get("elapsed_ms")
475                    .and_then(|v| v.as_u64())
476                    .unwrap_or(0);
477                eprintln!(
478                    "  Layer {}  {:<22} {:>36} {}",
479                    layer,
480                    name,
481                    format!("{:.1}s", ms as f64 / 1000.0),
482                    s.ok_sym(),
483                );
484            }
485            "MapComplete" | "MapFailed" => {
486                // Stop streaming — the final summary comes from the socket response
487                break;
488            }
489            _ => {} // Skip other event types
490        }
491    }
492}