1use crate::cli::output::{self, Styled};
4use crate::intelligence::cache::MapCache;
5use crate::map::types::SiteMap;
6use anyhow::{Context, Result};
7use std::collections::HashMap;
8use std::time::Instant;
9
10pub async fn run(
12 domain: &str,
13 max_nodes: u32,
14 max_render: u32,
15 timeout: u64,
16 fresh: bool,
17) -> Result<()> {
18 let s = Styled::new();
19 let start = Instant::now();
20
21 if !fresh {
23 let mut cache = MapCache::default_cache()?;
24 if let Some(path) = cache.get(domain) {
25 let data = std::fs::read(path)?;
26 let map = SiteMap::deserialize(&data).context("failed to load cached map")?;
27
28 if output::is_json() {
29 print_map_json(&map, start.elapsed());
30 return Ok(());
31 }
32
33 if !output::is_quiet() {
34 let age = path
35 .metadata()
36 .ok()
37 .and_then(|m| m.modified().ok())
38 .and_then(|t| t.elapsed().ok())
39 .map(|d| output::format_duration(d.as_secs()))
40 .unwrap_or_else(|| "unknown".to_string());
41 eprintln!(" Using cached map ({age} old). Use --fresh to re-map.");
42 eprintln!();
43 }
44
45 print_map_stats(&s, &map, start.elapsed());
46 return Ok(());
47 }
48 }
49
50 let show_progress = !output::is_quiet() && !output::is_json();
52
53 if show_progress {
54 eprintln!();
55 eprintln!(" {} Mapping {domain}", s.bold("CORTEX —"));
56 eprintln!();
57 }
58
59 let needs_setup = auto_setup_if_needed().await?;
60 if needs_setup && show_progress {
61 eprintln!();
62 }
63
64 use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
66 use tokio::net::UnixStream;
67
68 let socket_path = "/tmp/cortex.sock";
69 let mut stream = match UnixStream::connect(socket_path).await {
70 Ok(s) => s,
71 Err(_) => {
72 if output::is_json() {
73 output::print_json(&serde_json::json!({
74 "status": "daemon_required",
75 "message": "Cannot connect to Cortex daemon",
76 "hint": "Start with: cortex start"
77 }));
78 } else if !output::is_quiet() {
79 eprintln!(" Cannot connect to Cortex daemon.");
80 eprintln!(" Start the daemon with: cortex start");
81 }
82 return Ok(());
83 }
84 };
85
86 let sse_domain = domain.to_string();
88 let sse_handle = if show_progress {
89 Some(tokio::spawn(stream_progress_from_sse(sse_domain)))
90 } else {
91 None
92 };
93
94 let req = serde_json::json!({
95 "id": format!("map-{}", std::process::id()),
96 "method": "map",
97 "params": {
98 "domain": domain,
99 "max_nodes": max_nodes,
100 "max_render": max_render,
101 "max_time_ms": timeout,
102 "respect_robots": true,
103 }
104 });
105 let req_str = format!("{}\n", req);
106 stream
107 .write_all(req_str.as_bytes())
108 .await
109 .context("failed to send MAP request")?;
110
111 let (reader, _writer) = stream.into_split();
113 let mut reader = BufReader::new(reader);
114 let mut line = String::new();
115 let response_timeout = std::time::Duration::from_millis(timeout + 30000);
116 let read_result = tokio::time::timeout(response_timeout, reader.read_line(&mut line)).await;
117
118 if let Some(handle) = sse_handle {
120 handle.abort();
121 }
122
123 match read_result {
124 Ok(Ok(n)) if n > 0 => {} Ok(Ok(_)) => {
126 if !output::is_quiet() {
127 eprintln!(" Connection closed by server.");
128 }
129 return Ok(());
130 }
131 Ok(Err(e)) => {
132 if !output::is_quiet() {
133 eprintln!(" Read error: {e}");
134 }
135 return Ok(());
136 }
137 Err(_) => {
138 if !output::is_quiet() {
139 eprintln!(" Mapping timed out after {}ms.", timeout + 30000);
140 }
141 return Ok(());
142 }
143 };
144
145 let response: serde_json::Value =
146 serde_json::from_str(line.trim()).context("failed to parse response")?;
147
148 if let Some(error) = response.get("error") {
149 if output::is_json() {
150 output::print_json(&response);
151 } else if !output::is_quiet() {
152 let msg = error
153 .get("message")
154 .and_then(|v| v.as_str())
155 .unwrap_or("unknown error");
156 eprintln!(" Mapping failed: {msg}");
157 }
158 return Ok(());
159 }
160
161 let result = response.get("result").cloned().unwrap_or_default();
162 let node_count = result
163 .get("node_count")
164 .and_then(|v| v.as_u64())
165 .unwrap_or(0);
166 let edge_count = result
167 .get("edge_count")
168 .and_then(|v| v.as_u64())
169 .unwrap_or(0);
170
171 if output::is_json() {
172 output::print_json(&result);
173 return Ok(());
174 }
175
176 if show_progress {
177 let elapsed = start.elapsed();
178 eprintln!();
179 eprintln!(" {} Mapped {domain}", s.ok_sym());
180 eprintln!(
181 " {} nodes · {} edges · {:.1}s",
182 format_count(node_count),
183 format_count(edge_count),
184 elapsed.as_secs_f64()
185 );
186 eprintln!();
187 eprintln!(" Query with: cortex query {domain} --type product_detail");
188
189 eprintln!(" Dashboard: http://localhost:7700/dashboard");
191 }
192
193 if let Some(map_path) = result.get("map_path").and_then(|v| v.as_str()) {
195 if show_progress {
196 eprintln!(" Cached at: {map_path}");
197 }
198 }
199
200 Ok(())
201}
202
203fn print_map_stats(s: &Styled, map: &SiteMap, elapsed: std::time::Duration) {
205 let rendered = map.nodes.iter().filter(|n| n.flags.is_rendered()).count();
206 let estimated = map.nodes.len() - rendered;
207
208 let mut type_counts: HashMap<String, usize> = HashMap::new();
210 for node in &map.nodes {
211 let name = format!("{:?}", node.page_type).to_lowercase();
212 *type_counts.entry(name).or_default() += 1;
213 }
214 let mut type_vec: Vec<(String, usize)> = type_counts.into_iter().collect();
215 type_vec.sort_by(|a, b| b.1.cmp(&a.1));
216
217 let total_nodes = map.nodes.len();
218
219 eprintln!(" Map complete in {:.1}s", elapsed.as_secs_f64());
220 eprintln!();
221 eprintln!(" {}", s.bold(&format!("{:<45}", map.header.domain)));
222 eprintln!(
223 " Nodes: {} ({} rendered, {} estimated)",
224 total_nodes, rendered, estimated
225 );
226 eprintln!(" Edges: {}", map.edges.len());
227 if !map.cluster_centroids.is_empty() {
228 eprintln!(" Clusters: {}", map.cluster_centroids.len());
229 }
230 eprintln!(" Actions: {}", map.actions.len());
231 eprintln!();
232
233 if !type_vec.is_empty() {
234 eprintln!(" Top page types:");
235 for (name, count) in type_vec.iter().take(5) {
236 let pct = if total_nodes > 0 {
237 (count * 100) / total_nodes
238 } else {
239 0
240 };
241 eprintln!(" {:<20} {:>6} ({pct}%)", name, count);
242 }
243 }
244
245 eprintln!();
246 eprintln!(
247 " Query with: cortex query {} --type product_detail",
248 map.header.domain
249 );
250}
251
252fn print_map_json(map: &SiteMap, elapsed: std::time::Duration) {
254 let rendered = map.nodes.iter().filter(|n| n.flags.is_rendered()).count();
255
256 let mut type_counts: HashMap<String, usize> = HashMap::new();
257 for node in &map.nodes {
258 let name = format!("{:?}", node.page_type).to_lowercase();
259 *type_counts.entry(name).or_default() += 1;
260 }
261
262 output::print_json(&serde_json::json!({
263 "domain": map.header.domain,
264 "nodes": map.nodes.len(),
265 "edges": map.edges.len(),
266 "rendered": rendered,
267 "clusters": map.cluster_centroids.len(),
268 "actions": map.actions.len(),
269 "page_types": type_counts,
270 "duration_ms": elapsed.as_millis(),
271 }));
272}
273
274async fn auto_setup_if_needed() -> Result<bool> {
282 let mut did_something = false;
283
284 let chromium_path = crate::cli::doctor::find_chromium();
286 if chromium_path.is_none() {
287 if !output::is_quiet() {
288 let s = Styled::new();
289 eprintln!(
290 " {} Cortex is not set up yet. Let's get you started:",
291 s.info_sym()
292 );
293 eprintln!();
294 eprintln!(" [1/2] Installing Chromium...");
295 }
296 match crate::cli::install_cmd::run_with_force(false).await {
298 Ok(()) => {
299 did_something = true;
300 }
301 Err(e) => {
302 if !output::is_quiet() {
304 eprintln!(" Failed to auto-install Chromium: {e}");
305 eprintln!(" Run 'cortex install' manually for detailed output.");
306 }
307 return Err(e);
308 }
309 }
310 }
311
312 let socket_path = std::path::PathBuf::from("/tmp/cortex.sock");
314 if !socket_path.exists() {
315 if !output::is_quiet() {
316 if did_something {
317 eprintln!(" [2/2] Starting Cortex process...");
318 } else {
319 let s = Styled::new();
320 eprintln!(" {} Cortex is not running. Starting...", s.info_sym());
321 }
322 }
323 match crate::cli::start::run().await {
324 Ok(()) => {
325 did_something = true;
326 tokio::time::sleep(std::time::Duration::from_millis(300)).await;
328 }
329 Err(e) => {
330 if !output::is_quiet() {
331 eprintln!(" Failed to auto-start: {e}");
332 eprintln!(" Run 'cortex start' manually for details.");
333 }
334 return Err(e);
335 }
336 }
337 }
338
339 Ok(did_something)
340}
341
342fn format_count(n: u64) -> String {
344 if n < 1_000 {
345 return n.to_string();
346 }
347 let s = n.to_string();
348 let mut result = String::new();
349 for (i, ch) in s.chars().rev().enumerate() {
350 if i > 0 && i % 3 == 0 {
351 result.push(',');
352 }
353 result.push(ch);
354 }
355 result.chars().rev().collect()
356}
357
358async fn stream_progress_from_sse(domain: String) {
364 use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
365 use tokio::net::TcpStream;
366
367 let mut stream = match TcpStream::connect("127.0.0.1:7700").await {
369 Ok(s) => s,
370 Err(_) => return, };
372
373 let request = format!(
375 "GET /api/v1/events?domain={domain} HTTP/1.1\r\n\
376 Host: 127.0.0.1:7700\r\n\
377 Accept: text/event-stream\r\n\
378 Connection: keep-alive\r\n\
379 \r\n"
380 );
381 if stream.write_all(request.as_bytes()).await.is_err() {
382 return;
383 }
384
385 let s = Styled::new();
386 let reader = tokio::io::BufReader::new(stream);
387 let mut lines = reader.lines();
388
389 let mut past_headers = false;
391 while let Ok(Some(line)) = lines.next_line().await {
392 if line.is_empty() {
393 past_headers = true;
394 break;
395 }
396 }
397 if !past_headers {
398 return;
399 }
400
401 while let Ok(Some(line)) = lines.next_line().await {
403 let trimmed = line.trim();
404 if !trimmed.starts_with("data:") {
405 continue;
406 }
407 let json_str = trimmed.trim_start_matches("data:").trim();
408 if json_str.is_empty() {
409 continue;
410 }
411
412 let event: serde_json::Value = match serde_json::from_str(json_str) {
413 Ok(v) => v,
414 Err(_) => continue,
415 };
416
417 let event_type = event.get("type").and_then(|v| v.as_str()).unwrap_or("");
418 match event_type {
419 "SitemapDiscovered" => {
420 let count = event.get("url_count").and_then(|v| v.as_u64()).unwrap_or(0);
421 let ms = event
422 .get("elapsed_ms")
423 .and_then(|v| v.as_u64())
424 .unwrap_or(0);
425 eprintln!(
426 " Layer 0 {:<22} {} URLs discovered {:>20} {}",
427 "Metadata",
428 format_count(count),
429 format!("{:.1}s", ms as f64 / 1000.0),
430 s.ok_sym(),
431 );
432 }
433 "StructuredDataExtracted" => {
434 let pages = event
435 .get("pages_fetched")
436 .and_then(|v| v.as_u64())
437 .unwrap_or(0);
438 let jsonld = event
439 .get("jsonld_found")
440 .and_then(|v| v.as_u64())
441 .unwrap_or(0);
442 let patterns = event
443 .get("patterns_used")
444 .and_then(|v| v.as_u64())
445 .unwrap_or(0);
446 let ms = event
447 .get("elapsed_ms")
448 .and_then(|v| v.as_u64())
449 .unwrap_or(0);
450 let pct = if pages > 0 { (jsonld * 100) / pages } else { 0 };
451 eprintln!(
452 " Layer 1 {:<22} {} pages, {} JSON-LD ({}%) {:>15} {}",
453 "Structured Data",
454 pages,
455 jsonld,
456 pct,
457 format!("{:.1}s", ms as f64 / 1000.0),
458 s.ok_sym(),
459 );
460 if patterns > 0 {
461 eprintln!(
462 " Layer 1½ {:<22} {} pages enriched via CSS selectors",
463 "Pattern Engine", patterns,
464 );
465 }
466 }
467 "LayerComplete" => {
468 let layer = event.get("layer").and_then(|v| v.as_u64()).unwrap_or(0);
469 let name = event
470 .get("layer_name")
471 .and_then(|v| v.as_str())
472 .unwrap_or("?");
473 let ms = event
474 .get("elapsed_ms")
475 .and_then(|v| v.as_u64())
476 .unwrap_or(0);
477 eprintln!(
478 " Layer {} {:<22} {:>36} {}",
479 layer,
480 name,
481 format!("{:.1}s", ms as f64 / 1000.0),
482 s.ok_sym(),
483 );
484 }
485 "MapComplete" | "MapFailed" => {
486 break;
488 }
489 _ => {} }
491 }
492}