Skip to main content

hematite/tools/
workspace_workflow.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use serde_json::{Map, Value};
4use std::fs::{self, OpenOptions};
5use std::io::{Read, Seek, SeekFrom};
6use std::path::{Path, PathBuf};
7use std::process::Stdio;
8use std::time::{Duration, SystemTime, UNIX_EPOCH};
9
10const DEFAULT_WORKFLOW_TIMEOUT_MS: u64 = 600_000;
11const DEFAULT_VERIFY_TIMEOUT_MS: u64 = 1_800_000;
12const DEFAULT_WEBSITE_BOOT_TIMEOUT_MS: u64 = 120_000;
13const DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS: u64 = 5_000;
14const DEFAULT_WEBSITE_VALIDATE_TIMEOUT_MS: u64 = 30_000;
15const WEBSITE_LOG_TAIL_BYTES: u64 = 4_096;
16
17pub async fn run_workspace_workflow(args: &Value) -> Result<String, String> {
18    let root = require_project_workspace_root()?;
19    let workflow = required_string(args, "workflow")?;
20
21    match workflow {
22        "website_start" => start_website_server(args, &root).await,
23        "website_probe" => probe_website_server(args, &root).await,
24        "website_validate" => validate_website_server(args, &root).await,
25        "website_status" => website_server_status(args, &root).await,
26        "website_stop" => stop_website_server(args, &root).await,
27        _ => {
28            let invocation = WorkspaceInvocation::from_args(args, &root)?;
29            let output = crate::tools::shell::execute_command_in_dir(
30                &invocation.command,
31                &root,
32                invocation.timeout_ms,
33                false,
34            )
35            .await?;
36
37            Ok(format!(
38                "Workspace workflow: {}\nWorkspace root: {}\nCommand: {}\n\n{}",
39                invocation.workflow_label,
40                root.display(),
41                invocation.command,
42                output.trim()
43            ))
44        }
45    }
46}
47
48#[derive(Debug, Clone, PartialEq, Eq)]
49struct WorkspaceInvocation {
50    workflow_label: String,
51    command: String,
52    timeout_ms: u64,
53}
54
55#[derive(Debug, Clone, PartialEq, Eq)]
56struct WebsiteLaunchPlan {
57    mode: String,
58    script: String,
59    command: String,
60    url: String,
61    framework_hint: String,
62    boot_timeout_ms: u64,
63    request_timeout_ms: u64,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
67struct WebsiteServerState {
68    label: String,
69    mode: String,
70    script: String,
71    command: String,
72    url: String,
73    framework_hint: String,
74    pid: u32,
75    log_path: String,
76    workspace_root: String,
77    started_at_epoch_ms: u64,
78}
79
80#[derive(Debug, Clone, PartialEq, Eq)]
81struct WebsiteProbeSummary {
82    url: String,
83    status: u16,
84    content_type: Option<String>,
85    title: Option<String>,
86    body_preview: String,
87}
88
89#[derive(Debug, Clone, PartialEq, Eq)]
90struct WebsiteResponseSnapshot {
91    summary: WebsiteProbeSummary,
92    body: String,
93}
94
95impl WorkspaceInvocation {
96    fn from_args(args: &Value, root: &Path) -> Result<Self, String> {
97        let workflow = required_string(args, "workflow")?;
98        let timeout_ms = args
99            .get("timeout_ms")
100            .and_then(|value| value.as_u64())
101            .unwrap_or(default_timeout_ms(workflow));
102
103        let command = match workflow {
104            "build" => default_command_for_action(root, "build")?,
105            "test" => default_command_for_action(root, "test")?,
106            "lint" => default_command_for_action(root, "lint")?,
107            "fix" => default_command_for_action(root, "fix")?,
108            "package_script" => build_package_script_command(root, required_string(args, "name")?)?,
109            "task" => format!("task {}", required_string(args, "name")?),
110            "just" => format!("just {}", required_string(args, "name")?),
111            "make" => format!("make {}", required_string(args, "name")?),
112            "script_path" => build_script_path_command(root, required_string(args, "path")?)?,
113            "command" => required_string(args, "command")?.to_string(),
114            other => {
115                return Err(format!(
116                    "Unknown workflow '{}'. Use one of: build, test, lint, fix, package_script, task, just, make, script_path, command, website_start, website_probe, website_validate, website_status, website_stop.",
117                    other
118                ))
119            }
120        };
121
122        Ok(Self {
123            workflow_label: workflow.to_string(),
124            command,
125            timeout_ms,
126        })
127    }
128}
129
130fn require_project_workspace_root() -> Result<PathBuf, String> {
131    Ok(crate::tools::file_ops::workspace_root())
132}
133
134fn required_string<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
135    args.get(key)
136        .and_then(|value| value.as_str())
137        .map(str::trim)
138        .filter(|value| !value.is_empty())
139        .ok_or_else(|| format!("Missing required argument: '{}'", key))
140}
141
142fn optional_string<'a>(args: &'a Value, key: &str) -> Option<&'a str> {
143    args.get(key)
144        .and_then(|value| value.as_str())
145        .map(str::trim)
146        .filter(|value| !value.is_empty())
147}
148
149fn optional_string_vec(args: &Value, key: &str) -> Vec<String> {
150    args.get(key)
151        .and_then(|value| value.as_array())
152        .into_iter()
153        .flat_map(|items| items.iter())
154        .filter_map(|value| value.as_str())
155        .map(str::trim)
156        .filter(|value| !value.is_empty())
157        .map(ToOwned::to_owned)
158        .collect()
159}
160
161fn default_timeout_ms(workflow: &str) -> u64 {
162    match workflow {
163        "build" | "test" | "lint" | "fix" => DEFAULT_VERIFY_TIMEOUT_MS,
164        "website_start" => DEFAULT_WEBSITE_BOOT_TIMEOUT_MS,
165        "website_probe" | "website_status" => DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS,
166        "website_validate" => DEFAULT_WEBSITE_VALIDATE_TIMEOUT_MS,
167        _ => DEFAULT_WORKFLOW_TIMEOUT_MS,
168    }
169}
170
171fn default_command_for_action(root: &Path, action: &str) -> Result<String, String> {
172    let profile = crate::agent::workspace_profile::load_workspace_profile(root)
173        .unwrap_or_else(|| crate::agent::workspace_profile::detect_workspace_profile(root));
174
175    match action {
176        "build" => profile
177            .build_hint
178            .ok_or_else(|| missing_workspace_command_message(action, root)),
179        "test" => profile
180            .test_hint
181            .ok_or_else(|| missing_workspace_command_message(action, root)),
182        "lint" => detect_lint_command(root),
183        "fix" => detect_fix_command(root),
184        other => Err(format!("Unsupported workspace action '{}'.", other)),
185    }
186}
187
188fn missing_workspace_command_message(action: &str, root: &Path) -> String {
189    format!(
190        "Hematite could not infer a `{}` command for the locked workspace at {}. Add a workspace verify profile in `.hematite/settings.json`, or ask for an explicit command/script instead.",
191        action,
192        root.display()
193    )
194}
195
196fn detect_lint_command(root: &Path) -> Result<String, String> {
197    if root.join("Cargo.toml").exists() {
198        Ok("cargo clippy --all-targets --all-features -- -D warnings".to_string())
199    } else if root.join("package.json").exists() {
200        Ok(format!(
201            "{} run lint --if-present",
202            detect_node_package_manager(root)
203        ))
204    } else if root.join("go.mod").exists() {
205        Err(missing_workspace_command_message("lint", root))
206    } else {
207        Err(missing_workspace_command_message("lint", root))
208    }
209}
210
211fn detect_fix_command(root: &Path) -> Result<String, String> {
212    if root.join("Cargo.toml").exists() {
213        Ok("cargo fmt".to_string())
214    } else if root.join("package.json").exists() {
215        Ok(format!(
216            "{} run fix --if-present",
217            detect_node_package_manager(root)
218        ))
219    } else {
220        Err(missing_workspace_command_message("fix", root))
221    }
222}
223
224fn build_package_script_command(root: &Path, name: &str) -> Result<String, String> {
225    let package = read_package_json(root)?;
226    let has_script = package
227        .get("scripts")
228        .and_then(|value| value.get(name))
229        .is_some();
230    if !has_script {
231        return Err(format!(
232            "package.json does not define a script named `{}` in {}.",
233            name,
234            root.display()
235        ));
236    }
237
238    let package_manager = detect_node_package_manager(root);
239    let command = match package_manager.as_str() {
240        "yarn" => format!("yarn {}", name),
241        "bun" => format!("bun run {}", name),
242        manager => format!("{} run {}", manager, name),
243    };
244    Ok(command)
245}
246
247fn read_package_json(root: &Path) -> Result<Value, String> {
248    let package_json = root.join("package.json");
249    if !package_json.exists() {
250        return Err(format!(
251            "This workflow requires package.json in the locked workspace root ({}).",
252            root.display()
253        ));
254    }
255
256    let content = fs::read_to_string(&package_json)
257        .map_err(|e| format!("Failed to read {}: {}", package_json.display(), e))?;
258    serde_json::from_str(&content)
259        .map_err(|e| format!("Failed to parse {}: {}", package_json.display(), e))
260}
261
262fn package_scripts(package: &Value) -> Map<String, Value> {
263    package
264        .get("scripts")
265        .and_then(|value| value.as_object())
266        .cloned()
267        .unwrap_or_default()
268}
269
270fn build_script_path_command(root: &Path, relative_path: &str) -> Result<String, String> {
271    let candidate = root.join(relative_path);
272    let canonical_root = root
273        .canonicalize()
274        .map_err(|e| format!("Failed to resolve workspace root {}: {}", root.display(), e))?;
275    let canonical_path = candidate.canonicalize().map_err(|e| {
276        format!(
277            "Could not resolve script path `{}` from workspace root {}: {}",
278            relative_path,
279            root.display(),
280            e
281        )
282    })?;
283    if !canonical_path.starts_with(&canonical_root) {
284        return Err(format!(
285            "Script path `{}` resolves outside the locked workspace root {}.",
286            relative_path,
287            root.display()
288        ));
289    }
290
291    let display_path = normalize_relative_path(&canonical_path, &canonical_root)?;
292    let lower = display_path.to_ascii_lowercase();
293    if lower.ends_with(".ps1") {
294        Ok(format!(
295            "pwsh -ExecutionPolicy Bypass -File {}",
296            quote_command_arg(&display_path)
297        ))
298    } else if lower.ends_with(".cmd") || lower.ends_with(".bat") {
299        Ok(format!("cmd /C {}", quote_command_arg(&display_path)))
300    } else if lower.ends_with(".sh") {
301        Ok(format!("bash {}", quote_command_arg(&display_path)))
302    } else if lower.ends_with(".py") {
303        Ok(format!("python {}", quote_command_arg(&display_path)))
304    } else if lower.ends_with(".js") || lower.ends_with(".cjs") || lower.ends_with(".mjs") {
305        Ok(format!("node {}", quote_command_arg(&display_path)))
306    } else {
307        Ok(display_path)
308    }
309}
310
311fn normalize_relative_path(path: &Path, root: &Path) -> Result<String, String> {
312    let relative = path
313        .strip_prefix(root)
314        .map_err(|e| format!("Failed to normalize script path: {}", e))?;
315    Ok(format!(
316        ".{}{}",
317        std::path::MAIN_SEPARATOR,
318        relative.display()
319    ))
320}
321
322fn quote_command_arg(value: &str) -> String {
323    format!("\"{}\"", value.replace('"', "\\\""))
324}
325
326fn detect_node_package_manager(root: &Path) -> String {
327    if root.join("pnpm-lock.yaml").exists() {
328        "pnpm".to_string()
329    } else if root.join("yarn.lock").exists() {
330        "yarn".to_string()
331    } else if root.join("bun.lockb").exists() || root.join("bun.lock").exists() {
332        "bun".to_string()
333    } else {
334        "npm".to_string()
335    }
336}
337
338async fn start_website_server(args: &Value, root: &Path) -> Result<String, String> {
339    let label = optional_string(args, "label").unwrap_or("default");
340    let state_path = website_state_path(root, label);
341    if let Some(existing) = load_website_server_state(&state_path)? {
342        if is_process_alive(existing.pid).await {
343            return Err(format!(
344                "A website server labeled `{}` is already running.\nURL: {}\nPID: {}\nLog: {}\nUse workflow=website_status or workflow=website_stop first.",
345                existing.label, existing.url, existing.pid, existing.log_path
346            ));
347        }
348        let _ = fs::remove_file(&state_path);
349    }
350
351    let plan = detect_website_launch_plan(args, root)?;
352    let runtime_dir = website_runtime_dir(root);
353    fs::create_dir_all(&runtime_dir)
354        .map_err(|e| format!("Failed to create {}: {}", runtime_dir.display(), e))?;
355    let log_path = website_log_path(root, label);
356    let stdout_log = OpenOptions::new()
357        .create(true)
358        .truncate(true)
359        .write(true)
360        .open(&log_path)
361        .map_err(|e| format!("Failed to create {}: {}", log_path.display(), e))?;
362    let stderr_log = stdout_log
363        .try_clone()
364        .map_err(|e| format!("Failed to clone log handle {}: {}", log_path.display(), e))?;
365
366    let mut command = build_shell_command(&plan.command).await;
367    command
368        .current_dir(root)
369        .stdout(Stdio::from(stdout_log))
370        .stderr(Stdio::from(stderr_log));
371
372    let sandbox_root = crate::tools::file_ops::hematite_dir().join("sandbox");
373    let _ = fs::create_dir_all(&sandbox_root);
374    command.env("HOME", &sandbox_root);
375    command.env("TMPDIR", &sandbox_root);
376    command.env("CI", "1");
377    command.env("BROWSER", "none");
378
379    let mut child = command
380        .spawn()
381        .map_err(|e| format!("Failed to start website server: {}", e))?;
382    let pid = child
383        .id()
384        .ok_or_else(|| "Website server started without a visible process id.".to_string())?;
385
386    let started_at_epoch_ms = SystemTime::now()
387        .duration_since(UNIX_EPOCH)
388        .map_err(|e| format!("Clock error: {}", e))?
389        .as_millis() as u64;
390
391    let state = WebsiteServerState {
392        label: label.to_string(),
393        mode: plan.mode.clone(),
394        script: plan.script.clone(),
395        command: plan.command.clone(),
396        url: plan.url.clone(),
397        framework_hint: plan.framework_hint.clone(),
398        pid,
399        log_path: log_path.display().to_string(),
400        workspace_root: root.display().to_string(),
401        started_at_epoch_ms,
402    };
403
404    let probe = wait_for_website_readiness(
405        &mut child,
406        &plan.url,
407        plan.boot_timeout_ms,
408        plan.request_timeout_ms,
409        &log_path,
410    )
411    .await
412    .map_err(|message| {
413        let _ = fs::remove_file(&state_path);
414        message
415    })?;
416
417    save_website_server_state(&state_path, &state)?;
418
419    Ok(format!(
420        "Workspace workflow: website_start\nWorkspace root: {}\nMode: {}\nLabel: {}\nScript: {}\nCommand: {}\nFramework hint: {}\nURL: {}\nPID: {}\nLog: {}\n\nReady: HTTP {}{}\n{}",
421        root.display(),
422        state.mode,
423        state.label,
424        state.script,
425        state.command,
426        state.framework_hint,
427        state.url,
428        state.pid,
429        state.log_path,
430        probe.status,
431        probe
432            .title
433            .as_ref()
434            .map(|title| format!(" ({title})"))
435            .unwrap_or_default(),
436        format_probe_details(&probe)
437    ))
438}
439
440fn detect_website_launch_plan(args: &Value, root: &Path) -> Result<WebsiteLaunchPlan, String> {
441    let package = read_package_json(root)?;
442    let scripts = package_scripts(&package);
443    let runtime_contract = load_runtime_contract(root);
444    let mode = optional_string(args, "mode")
445        .unwrap_or("dev")
446        .to_ascii_lowercase();
447    let script = if let Some(explicit) = optional_string(args, "script") {
448        explicit.to_string()
449    } else {
450        detect_website_script_name(&scripts, &mode)?
451    };
452    if !scripts.contains_key(&script) {
453        return Err(format!(
454            "package.json does not define a website script named `{}` in {}.",
455            script,
456            root.display()
457        ));
458    }
459
460    let framework_hint = infer_website_framework(&package);
461    let port = args
462        .get("port")
463        .and_then(|value| value.as_u64())
464        .and_then(|value| u16::try_from(value).ok())
465        .or_else(|| infer_website_default_port(&package, &mode));
466    let host = optional_string(args, "host").unwrap_or("127.0.0.1");
467    let url = if let Some(explicit_url) = optional_string(args, "url") {
468        normalize_http_url(explicit_url)
469    } else if let Some(url_hint) = runtime_contract
470        .as_ref()
471        .and_then(|contract| contract.local_url_hint.clone())
472    {
473        url_hint
474    } else {
475        let inferred_port = port.unwrap_or(if mode == "preview" { 4173 } else { 3000 });
476        format!("http://{}:{}/", host, inferred_port)
477    };
478
479    Ok(WebsiteLaunchPlan {
480        mode,
481        script: script.clone(),
482        command: build_package_script_command(root, &script)?,
483        url,
484        framework_hint,
485        boot_timeout_ms: args
486            .get("timeout_ms")
487            .and_then(|value| value.as_u64())
488            .unwrap_or(DEFAULT_WEBSITE_BOOT_TIMEOUT_MS),
489        request_timeout_ms: args
490            .get("request_timeout_ms")
491            .and_then(|value| value.as_u64())
492            .unwrap_or(DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS),
493    })
494}
495
496fn detect_website_script_name(scripts: &Map<String, Value>, mode: &str) -> Result<String, String> {
497    let candidates = match mode {
498        "dev" => ["dev", "start", "serve"],
499        "preview" => ["preview", "serve", "start"],
500        "start" => ["start", "serve", "dev"],
501        other => {
502            return Err(format!(
503                "Unknown website mode `{}`. Use one of: dev, preview, start.",
504                other
505            ))
506        }
507    };
508
509    candidates
510        .iter()
511        .find(|candidate| scripts.contains_key(**candidate))
512        .map(|candidate| candidate.to_string())
513        .ok_or_else(|| {
514            format!(
515                "Could not infer a website {} script from package.json. Define one of [{}], or pass `script` explicitly.",
516                mode,
517                candidates.join(", ")
518            )
519        })
520}
521
522fn infer_website_framework(package: &Value) -> String {
523    let deps = dependency_names(package);
524    let script_text = package_scripts(package)
525        .into_values()
526        .filter_map(|value| value.as_str().map(|text| text.to_ascii_lowercase()))
527        .collect::<Vec<_>>()
528        .join("\n");
529
530    if deps.contains("next") || script_text.contains("next ") {
531        "next".to_string()
532    } else if deps.contains("vite") || script_text.contains("vite") {
533        "vite".to_string()
534    } else if deps.contains("astro") || script_text.contains("astro ") {
535        "astro".to_string()
536    } else if deps.contains("@angular/core") || script_text.contains("ng serve") {
537        "angular".to_string()
538    } else if deps.contains("gatsby") || script_text.contains("gatsby ") {
539        "gatsby".to_string()
540    } else if deps.contains("react-scripts") || script_text.contains("react-scripts") {
541        "react-scripts".to_string()
542    } else if deps.contains("@sveltejs/kit") || script_text.contains("svelte-kit") {
543        "sveltekit".to_string()
544    } else if deps.contains("nuxt") || script_text.contains("nuxt ") {
545        "nuxt".to_string()
546    } else {
547        "generic-node-site".to_string()
548    }
549}
550
551fn infer_website_default_port(package: &Value, mode: &str) -> Option<u16> {
552    match infer_website_framework(package).as_str() {
553        "vite" | "sveltekit" => Some(if mode == "preview" { 4173 } else { 5173 }),
554        "astro" => Some(4321),
555        "gatsby" => Some(8000),
556        "angular" => Some(4200),
557        "next" | "react-scripts" | "nuxt" => Some(3000),
558        _ => None,
559    }
560}
561
562fn dependency_names(package: &Value) -> std::collections::BTreeSet<String> {
563    let mut deps = std::collections::BTreeSet::new();
564    for field in ["dependencies", "devDependencies", "peerDependencies"] {
565        if let Some(map) = package.get(field).and_then(|value| value.as_object()) {
566            for name in map.keys() {
567                deps.insert(name.to_ascii_lowercase());
568            }
569        }
570    }
571    deps
572}
573
574async fn wait_for_website_readiness(
575    child: &mut tokio::process::Child,
576    url: &str,
577    boot_timeout_ms: u64,
578    request_timeout_ms: u64,
579    log_path: &Path,
580) -> Result<WebsiteProbeSummary, String> {
581    let deadline = tokio::time::Instant::now() + Duration::from_millis(boot_timeout_ms);
582    let client = reqwest::Client::builder()
583        .timeout(Duration::from_millis(request_timeout_ms))
584        .redirect(reqwest::redirect::Policy::limited(5))
585        .build()
586        .map_err(|e| format!("Failed to build readiness probe client: {}", e))?;
587
588    loop {
589        let probe_error = match probe_website_once(&client, url).await {
590            Ok(summary) => return Ok(summary),
591            Err(err) => err,
592        };
593
594        match child.try_wait() {
595            Ok(Some(status)) => {
596                return Err(format!(
597                    "Website server exited before it became ready (status: {}).\nLast probe error: {}\n{}",
598                    status,
599                    probe_error,
600                    format_log_tail_for_path("Recent log tail", Some(log_path))
601                ));
602            }
603            Ok(None) => {}
604            Err(err) => {
605                return Err(format!("Failed to inspect website server status: {}", err));
606            }
607        }
608
609        if tokio::time::Instant::now() >= deadline {
610            let _ = child.kill().await;
611            return Err(format!(
612                "Website server did not become ready within {} ms.\nLast probe error: {}\n{}",
613                boot_timeout_ms,
614                probe_error,
615                format_log_tail_for_path("Recent log tail", Some(log_path))
616            ));
617        }
618
619        tokio::time::sleep(Duration::from_millis(750)).await;
620    }
621}
622
623async fn probe_website_server(args: &Value, root: &Path) -> Result<String, String> {
624    let label = optional_string(args, "label").unwrap_or("default");
625    let state = load_website_server_state(&website_state_path(root, label))?;
626    let (url, log_path) = if let Some(state) = state {
627        (state.url, Some(state.log_path))
628    } else if let Some(url) = optional_string(args, "url") {
629        (normalize_http_url(url), None)
630    } else {
631        return Err(format!(
632            "No tracked website server labeled `{}`. Pass `url` to probe an arbitrary local site, or start one with workflow=website_start.",
633            label
634        ));
635    };
636
637    let request_timeout_ms = args
638        .get("timeout_ms")
639        .and_then(|value| value.as_u64())
640        .unwrap_or(DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS);
641    let client = reqwest::Client::builder()
642        .timeout(Duration::from_millis(request_timeout_ms))
643        .redirect(reqwest::redirect::Policy::limited(5))
644        .build()
645        .map_err(|e| format!("Failed to build probe client: {}", e))?;
646    let probe = probe_website_once(&client, &url).await.map_err(|e| {
647        if let Some(path) = log_path.as_deref() {
648            format!("{}\n{}", e, format_log_tail("Recent log tail", Some(path)))
649        } else {
650            e
651        }
652    })?;
653
654    Ok(format!(
655        "Workspace workflow: website_probe\nWorkspace root: {}\nURL: {}\n\nHTTP {}{}\n{}",
656        root.display(),
657        probe.url,
658        probe.status,
659        probe
660            .title
661            .as_ref()
662            .map(|title| format!(" ({title})"))
663            .unwrap_or_default(),
664        format_probe_details(&probe)
665    ))
666}
667
668async fn validate_website_server(args: &Value, root: &Path) -> Result<String, String> {
669    let label = optional_string(args, "label").unwrap_or("default");
670    let (base_url, log_path) = resolve_website_target(args, root, label)?;
671    let routes = default_website_routes(args, root);
672    let asset_limit = args
673        .get("asset_limit")
674        .and_then(|value| value.as_u64())
675        .unwrap_or(8)
676        .min(24) as usize;
677    let request_timeout_ms = args
678        .get("timeout_ms")
679        .and_then(|value| value.as_u64())
680        .unwrap_or(DEFAULT_WEBSITE_VALIDATE_TIMEOUT_MS);
681    let client = reqwest::Client::builder()
682        .timeout(Duration::from_millis(request_timeout_ms))
683        .redirect(reqwest::redirect::Policy::limited(5))
684        .build()
685        .map_err(|e| format!("Failed to build validation client: {}", e))?;
686
687    let mut route_lines = Vec::new();
688    let mut asset_lines = Vec::new();
689    let mut issues = Vec::new();
690    let mut assets = std::collections::BTreeSet::new();
691
692    for route in &routes {
693        let route_url = resolve_website_url(&base_url, route)?;
694        match fetch_website_snapshot(&client, &route_url).await {
695            Ok(snapshot) => {
696                let summary = &snapshot.summary;
697                route_lines.push(format!(
698                    "- {} -> HTTP {}{}",
699                    route,
700                    summary.status,
701                    summary
702                        .title
703                        .as_ref()
704                        .map(|title| format!(" ({title})"))
705                        .unwrap_or_default()
706                ));
707                let content_type = summary.content_type.as_deref().unwrap_or_default();
708                if content_type.contains("text/html") {
709                    if summary.title.is_none() {
710                        issues.push(format!("Route {} returned HTML without a <title>.", route));
711                    }
712                    for asset in extract_local_asset_urls(&route_url, &snapshot.body)
713                        .into_iter()
714                        .take(asset_limit)
715                    {
716                        assets.insert(asset);
717                    }
718                }
719            }
720            Err(err) => {
721                issues.push(format!("Route {} failed validation: {}", route, err));
722            }
723        }
724    }
725
726    for asset_url in assets.iter().take(asset_limit) {
727        match probe_website_once(&client, asset_url).await {
728            Ok(summary) => asset_lines.push(format!(
729                "- {} -> HTTP {} ({})",
730                asset_url,
731                summary.status,
732                summary
733                    .content_type
734                    .as_deref()
735                    .unwrap_or("unknown content type")
736            )),
737            Err(err) => issues.push(format!("Asset {} failed validation: {}", asset_url, err)),
738        }
739    }
740
741    let result = if issues.is_empty() { "PASS" } else { "FAIL" };
742    let mut out = format!(
743        "Workspace workflow: website_validate\nWorkspace root: {}\nBase URL: {}\nRoutes checked: {}\nAssets checked: {}\nResult: {}",
744        root.display(),
745        base_url,
746        routes.len(),
747        asset_lines.len(),
748        result
749    );
750    if !route_lines.is_empty() {
751        out.push_str("\n\nRoutes\n");
752        out.push_str(&route_lines.join("\n"));
753    }
754    if !asset_lines.is_empty() {
755        out.push_str("\n\nAssets\n");
756        out.push_str(&asset_lines.join("\n"));
757    }
758    if !issues.is_empty() {
759        out.push_str("\n\nIssues\n");
760        out.push_str(
761            &issues
762                .into_iter()
763                .map(|issue| format!("- {}", issue))
764                .collect::<Vec<_>>()
765                .join("\n"),
766        );
767    }
768    if let Some(path) = log_path.as_deref() {
769        out.push_str("\n\n");
770        out.push_str(&format_log_tail("Recent log tail", Some(path)));
771    }
772    Ok(out)
773}
774
775fn resolve_website_target(
776    args: &Value,
777    root: &Path,
778    label: &str,
779) -> Result<(String, Option<String>), String> {
780    let state = load_website_server_state(&website_state_path(root, label))?;
781    if let Some(state) = state {
782        return Ok((state.url, Some(state.log_path)));
783    }
784    if let Some(url) = optional_string(args, "url") {
785        return Ok((normalize_http_url(url), None));
786    }
787    if let Some(url_hint) = load_runtime_contract(root).and_then(|contract| contract.local_url_hint)
788    {
789        return Ok((url_hint, None));
790    }
791    Err(format!(
792        "No tracked website server labeled `{}` and no explicit url. Start the site with workflow=website_start or pass `url`.",
793        label
794    ))
795}
796
797fn default_website_routes(args: &Value, root: &Path) -> Vec<String> {
798    let mut routes = optional_string_vec(args, "routes");
799    if !routes.is_empty() {
800        return normalize_route_hints(routes);
801    }
802    if let Some(contract) = load_runtime_contract(root) {
803        routes = contract.route_hints;
804    }
805    if routes.is_empty() {
806        routes.push("/".to_string());
807    }
808    normalize_route_hints(routes)
809}
810
811fn normalize_route_hints(routes: Vec<String>) -> Vec<String> {
812    let mut normalized = std::collections::BTreeSet::new();
813    for route in routes {
814        let trimmed = route.trim();
815        if trimmed.is_empty() {
816            continue;
817        }
818        if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
819            normalized.insert(trimmed.to_string());
820        } else if trimmed.starts_with('/') {
821            normalized.insert(trimmed.to_string());
822        } else {
823            normalized.insert(format!("/{}", trimmed));
824        }
825    }
826    if normalized.is_empty() {
827        normalized.insert("/".to_string());
828    }
829    normalized.into_iter().collect()
830}
831
832fn resolve_website_url(base_url: &str, route: &str) -> Result<String, String> {
833    if route.starts_with("http://") || route.starts_with("https://") {
834        return Ok(route.to_string());
835    }
836    let base = reqwest::Url::parse(base_url)
837        .map_err(|e| format!("Invalid base URL {}: {}", base_url, e))?;
838    base.join(route).map(|url| url.to_string()).map_err(|e| {
839        format!(
840            "Failed to resolve route {} against {}: {}",
841            route, base_url, e
842        )
843    })
844}
845
846async fn website_server_status(args: &Value, root: &Path) -> Result<String, String> {
847    let label = optional_string(args, "label").unwrap_or("default");
848    let state_path = website_state_path(root, label);
849    let Some(state) = load_website_server_state(&state_path)? else {
850        return Err(format!(
851            "No tracked website server labeled `{}`. Start one with workflow=website_start.",
852            label
853        ));
854    };
855
856    let alive = is_process_alive(state.pid).await;
857    let request_timeout_ms = args
858        .get("timeout_ms")
859        .and_then(|value| value.as_u64())
860        .unwrap_or(DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS);
861    let client = reqwest::Client::builder()
862        .timeout(Duration::from_millis(request_timeout_ms))
863        .redirect(reqwest::redirect::Policy::limited(5))
864        .build()
865        .map_err(|e| format!("Failed to build status probe client: {}", e))?;
866    let probe = probe_website_once(&client, &state.url).await.ok();
867
868    let mut out = format!(
869        "Workspace workflow: website_status\nWorkspace root: {}\nLabel: {}\nMode: {}\nScript: {}\nCommand: {}\nFramework hint: {}\nURL: {}\nPID: {}\nAlive: {}\nLog: {}",
870        root.display(),
871        state.label,
872        state.mode,
873        state.script,
874        state.command,
875        state.framework_hint,
876        state.url,
877        state.pid,
878        if alive { "yes" } else { "no" },
879        state.log_path
880    );
881    if let Some(probe) = probe {
882        out.push_str(&format!(
883            "\n\nHTTP {}{}\n{}",
884            probe.status,
885            probe
886                .title
887                .as_ref()
888                .map(|title| format!(" ({title})"))
889                .unwrap_or_default(),
890            format_probe_details(&probe)
891        ));
892    } else {
893        out.push_str("\n\nHTTP probe: unavailable");
894    }
895    out.push_str("\n");
896    out.push_str(&format_log_tail("Recent log tail", Some(&state.log_path)));
897    Ok(out)
898}
899
900async fn stop_website_server(args: &Value, root: &Path) -> Result<String, String> {
901    let label = optional_string(args, "label").unwrap_or("default");
902    let state_path = website_state_path(root, label);
903    let Some(state) = load_website_server_state(&state_path)? else {
904        return Err(format!(
905            "No tracked website server labeled `{}`. Nothing to stop.",
906            label
907        ));
908    };
909
910    let was_alive = is_process_alive(state.pid).await;
911    if was_alive {
912        kill_process(state.pid).await?;
913    }
914    let _ = fs::remove_file(&state_path);
915
916    Ok(format!(
917        "Workspace workflow: website_stop\nWorkspace root: {}\nLabel: {}\nPID: {}\nWas alive: {}\nURL: {}\nLog: {}\n\n{}",
918        root.display(),
919        state.label,
920        state.pid,
921        if was_alive { "yes" } else { "no" },
922        state.url,
923        state.log_path,
924        format_log_tail("Recent log tail", Some(&state.log_path))
925    ))
926}
927
928async fn probe_website_once(
929    client: &reqwest::Client,
930    url: &str,
931) -> Result<WebsiteProbeSummary, String> {
932    Ok(fetch_website_snapshot(client, url).await?.summary)
933}
934
935async fn fetch_website_snapshot(
936    client: &reqwest::Client,
937    url: &str,
938) -> Result<WebsiteResponseSnapshot, String> {
939    let response = client
940        .get(url)
941        .send()
942        .await
943        .map_err(|e| format!("HTTP probe failed for {}: {}", url, e))?;
944    let status = response.status();
945    let content_type = response
946        .headers()
947        .get(reqwest::header::CONTENT_TYPE)
948        .and_then(|value| value.to_str().ok())
949        .map(|value| value.to_string());
950    let body = response
951        .text()
952        .await
953        .map_err(|e| format!("Failed to read response body from {}: {}", url, e))?;
954    if !status.is_success() {
955        return Err(format!(
956            "HTTP probe returned {} for {}.",
957            status.as_u16(),
958            url
959        ));
960    }
961
962    Ok(WebsiteResponseSnapshot {
963        summary: WebsiteProbeSummary {
964            url: url.to_string(),
965            status: status.as_u16(),
966            content_type,
967            title: extract_html_title(&body),
968            body_preview: html_preview_text(&body),
969        },
970        body,
971    })
972}
973
974fn extract_html_title(body: &str) -> Option<String> {
975    let re = Regex::new(r"(?is)<title[^>]*>(.*?)</title>").ok()?;
976    re.captures(body)
977        .and_then(|captures| captures.get(1).map(|value| value.as_str()))
978        .map(compact_whitespace)
979        .filter(|title| !title.is_empty())
980}
981
982fn html_preview_text(body: &str) -> String {
983    let strip_re = Regex::new(r"(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|<[^>]+>")
984        .expect("valid strip regex");
985    let stripped = strip_re.replace_all(body, " ");
986    let compact = compact_whitespace(&stripped);
987    compact.chars().take(240).collect()
988}
989
990fn compact_whitespace(input: &str) -> String {
991    input.split_whitespace().collect::<Vec<_>>().join(" ")
992}
993
994fn format_probe_details(probe: &WebsiteProbeSummary) -> String {
995    let mut lines = Vec::new();
996    if let Some(content_type) = probe.content_type.as_deref() {
997        lines.push(format!("Content-Type: {}", content_type));
998    }
999    if let Some(title) = probe.title.as_deref() {
1000        lines.push(format!("Title: {}", title));
1001    }
1002    if !probe.body_preview.is_empty() {
1003        lines.push(format!("Body preview: {}", probe.body_preview));
1004    }
1005    if lines.is_empty() {
1006        "(no probe details)".to_string()
1007    } else {
1008        lines.join("\n")
1009    }
1010}
1011
1012fn normalize_http_url(url: &str) -> String {
1013    let trimmed = url.trim();
1014    if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
1015        trimmed.to_string()
1016    } else {
1017        format!("http://{}", trimmed)
1018    }
1019}
1020
1021fn extract_local_asset_urls(page_url: &str, body: &str) -> Vec<String> {
1022    let Ok(page) = reqwest::Url::parse(page_url) else {
1023        return Vec::new();
1024    };
1025    let regex = Regex::new(r#"(?is)(?:src|href)=["']([^"'#]+)["']"#).expect("valid asset regex");
1026    let mut assets = std::collections::BTreeSet::new();
1027    for captures in regex.captures_iter(body) {
1028        let Some(raw) = captures.get(1).map(|value| value.as_str().trim()) else {
1029            continue;
1030        };
1031        let lower = raw.to_ascii_lowercase();
1032        if lower.starts_with("http://")
1033            || lower.starts_with("https://")
1034            || lower.starts_with("data:")
1035            || lower.starts_with("mailto:")
1036            || lower.starts_with("tel:")
1037            || lower.starts_with("javascript:")
1038        {
1039            continue;
1040        }
1041        if !looks_like_static_asset(raw) {
1042            continue;
1043        }
1044        if let Ok(joined) = page.join(raw) {
1045            assets.insert(joined.to_string());
1046        }
1047    }
1048    assets.into_iter().collect()
1049}
1050
1051fn looks_like_static_asset(path: &str) -> bool {
1052    let lower = path.to_ascii_lowercase();
1053    [
1054        ".css",
1055        ".js",
1056        ".mjs",
1057        ".ico",
1058        ".png",
1059        ".jpg",
1060        ".jpeg",
1061        ".svg",
1062        ".webp",
1063        ".gif",
1064        ".woff",
1065        ".woff2",
1066        ".map",
1067        ".json",
1068        ".webmanifest",
1069    ]
1070    .iter()
1071    .any(|suffix| lower.contains(suffix))
1072}
1073
1074fn load_runtime_contract(root: &Path) -> Option<crate::agent::workspace_profile::RuntimeContract> {
1075    crate::agent::workspace_profile::load_workspace_profile(root)
1076        .unwrap_or_else(|| crate::agent::workspace_profile::detect_workspace_profile(root))
1077        .runtime_contract
1078}
1079
1080fn website_runtime_dir(root: &Path) -> PathBuf {
1081    if crate::tools::file_ops::is_os_shortcut_directory(root) {
1082        crate::tools::file_ops::hematite_dir().join("website-runtime")
1083    } else {
1084        root.join(".hematite").join("website-runtime")
1085    }
1086}
1087
1088fn website_state_path(root: &Path, label: &str) -> PathBuf {
1089    website_runtime_dir(root).join(format!("{}.json", slugify_label(label)))
1090}
1091
1092fn website_log_path(root: &Path, label: &str) -> PathBuf {
1093    website_runtime_dir(root).join(format!("{}.log", slugify_label(label)))
1094}
1095
1096fn slugify_label(input: &str) -> String {
1097    let mut slug = String::new();
1098    let mut last_dash = false;
1099    for ch in input.chars() {
1100        let lower = ch.to_ascii_lowercase();
1101        if lower.is_ascii_alphanumeric() {
1102            slug.push(lower);
1103            last_dash = false;
1104        } else if !last_dash {
1105            slug.push('-');
1106            last_dash = true;
1107        }
1108    }
1109    let trimmed = slug.trim_matches('-');
1110    if trimmed.is_empty() {
1111        "default".to_string()
1112    } else {
1113        trimmed.to_string()
1114    }
1115}
1116
1117fn save_website_server_state(path: &Path, state: &WebsiteServerState) -> Result<(), String> {
1118    if let Some(parent) = path.parent() {
1119        fs::create_dir_all(parent)
1120            .map_err(|e| format!("Failed to create {}: {}", parent.display(), e))?;
1121    }
1122    let payload = serde_json::to_string_pretty(state)
1123        .map_err(|e| format!("Failed to encode website state: {}", e))?;
1124    fs::write(path, payload).map_err(|e| format!("Failed to write {}: {}", path.display(), e))
1125}
1126
1127fn load_website_server_state(path: &Path) -> Result<Option<WebsiteServerState>, String> {
1128    if !path.exists() {
1129        return Ok(None);
1130    }
1131    let raw = fs::read_to_string(path)
1132        .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?;
1133    let state = serde_json::from_str(&raw)
1134        .map_err(|e| format!("Failed to parse {}: {}", path.display(), e))?;
1135    Ok(Some(state))
1136}
1137
1138fn format_log_tail(label: &str, path: Option<&str>) -> String {
1139    match path {
1140        Some(path) => match read_log_tail(Path::new(path)) {
1141            Ok(tail) if tail.is_empty() => format!("{}: (empty)", label),
1142            Ok(tail) => format!("{}:\n{}", label, tail),
1143            Err(err) => format!("{}: unavailable ({})", label, err),
1144        },
1145        None => format!("{}: unavailable", label),
1146    }
1147}
1148
1149fn format_log_tail_for_path(label: &str, path: Option<&Path>) -> String {
1150    match path {
1151        Some(path) => match read_log_tail(path) {
1152            Ok(tail) if tail.is_empty() => format!("{}: (empty)", label),
1153            Ok(tail) => format!("{}:\n{}", label, tail),
1154            Err(err) => format!("{}: unavailable ({})", label, err),
1155        },
1156        None => format!("{}: unavailable", label),
1157    }
1158}
1159
1160fn read_log_tail(path: &Path) -> Result<String, String> {
1161    let mut file =
1162        fs::File::open(path).map_err(|e| format!("failed to open {}: {}", path.display(), e))?;
1163    let len = file
1164        .metadata()
1165        .map_err(|e| format!("failed to inspect {}: {}", path.display(), e))?
1166        .len();
1167    let start = len.saturating_sub(WEBSITE_LOG_TAIL_BYTES);
1168    file.seek(SeekFrom::Start(start))
1169        .map_err(|e| format!("failed to seek {}: {}", path.display(), e))?;
1170    let mut buffer = String::new();
1171    file.read_to_string(&mut buffer)
1172        .map_err(|e| format!("failed to read {}: {}", path.display(), e))?;
1173    Ok(buffer.trim().to_string())
1174}
1175
1176async fn build_shell_command(command: &str) -> tokio::process::Command {
1177    #[cfg(target_os = "windows")]
1178    {
1179        let normalized = command
1180            .replace("/dev/null", "$null")
1181            .replace("1>/dev/null", "2>$null")
1182            .replace("2>/dev/null", "2>$null");
1183
1184        if which("pwsh").await {
1185            let mut cmd = tokio::process::Command::new("pwsh");
1186            cmd.args(["-NoProfile", "-NonInteractive", "-Command", &normalized]);
1187            cmd
1188        } else {
1189            let mut cmd = tokio::process::Command::new("powershell");
1190            cmd.args(["-NoProfile", "-NonInteractive", "-Command", &normalized]);
1191            cmd
1192        }
1193    }
1194    #[cfg(not(target_os = "windows"))]
1195    {
1196        let mut cmd = tokio::process::Command::new("sh");
1197        cmd.args(["-c", command]);
1198        cmd
1199    }
1200}
1201
1202#[cfg(target_os = "windows")]
1203async fn which(name: &str) -> bool {
1204    #[cfg(target_os = "windows")]
1205    let check = format!("{}.exe", name);
1206    #[cfg(not(target_os = "windows"))]
1207    let check = name;
1208
1209    tokio::process::Command::new("where")
1210        .arg(check)
1211        .stdout(Stdio::null())
1212        .stderr(Stdio::null())
1213        .status()
1214        .await
1215        .map(|status| status.success())
1216        .unwrap_or(false)
1217}
1218
1219async fn is_process_alive(pid: u32) -> bool {
1220    #[cfg(target_os = "windows")]
1221    {
1222        tokio::process::Command::new("tasklist")
1223            .args(["/FI", &format!("PID eq {}", pid)])
1224            .stdout(Stdio::piped())
1225            .stderr(Stdio::null())
1226            .output()
1227            .await
1228            .ok()
1229            .map(|output| {
1230                let text = String::from_utf8_lossy(&output.stdout);
1231                text.lines().any(|line| {
1232                    line.split_whitespace()
1233                        .any(|token| token == pid.to_string())
1234                })
1235            })
1236            .unwrap_or(false)
1237    }
1238    #[cfg(not(target_os = "windows"))]
1239    {
1240        tokio::process::Command::new("kill")
1241            .args(["-0", &pid.to_string()])
1242            .stdout(Stdio::null())
1243            .stderr(Stdio::null())
1244            .status()
1245            .await
1246            .map(|status| status.success())
1247            .unwrap_or(false)
1248    }
1249}
1250
1251async fn kill_process(pid: u32) -> Result<(), String> {
1252    #[cfg(target_os = "windows")]
1253    {
1254        let output = tokio::process::Command::new("taskkill")
1255            .args(["/PID", &pid.to_string(), "/T", "/F"])
1256            .output()
1257            .await
1258            .map_err(|e| format!("Failed to stop PID {}: {}", pid, e))?;
1259        if output.status.success() {
1260            Ok(())
1261        } else {
1262            Err(format!(
1263                "Failed to stop PID {}: {}",
1264                pid,
1265                String::from_utf8_lossy(&output.stderr).trim()
1266            ))
1267        }
1268    }
1269    #[cfg(not(target_os = "windows"))]
1270    {
1271        let status = tokio::process::Command::new("kill")
1272            .args(["-TERM", &pid.to_string()])
1273            .status()
1274            .await
1275            .map_err(|e| format!("Failed to stop PID {}: {}", pid, e))?;
1276        if status.success() {
1277            Ok(())
1278        } else {
1279            Err(format!("Failed to stop PID {}.", pid))
1280        }
1281    }
1282}
1283
1284#[cfg(test)]
1285mod tests {
1286    use super::*;
1287
1288    fn write_package(root: &Path, json: &str) {
1289        fs::write(root.join("package.json"), json).unwrap();
1290    }
1291
1292    #[test]
1293    fn package_script_uses_detected_package_manager() {
1294        let package_root = std::env::temp_dir().join(format!(
1295            "hematite-workspace-workflow-node-{}",
1296            std::process::id()
1297        ));
1298        std::fs::create_dir_all(&package_root).unwrap();
1299        std::fs::write(
1300            package_root.join("package.json"),
1301            r#"{ "scripts": { "dev": "vite" } }"#,
1302        )
1303        .unwrap();
1304        std::fs::write(package_root.join("pnpm-lock.yaml"), "").unwrap();
1305
1306        let command = build_package_script_command(&package_root, "dev").unwrap();
1307        assert_eq!(command, "pnpm run dev");
1308
1309        let _ = std::fs::remove_file(package_root.join("package.json"));
1310        let _ = std::fs::remove_file(package_root.join("pnpm-lock.yaml"));
1311        let _ = std::fs::remove_dir(package_root);
1312    }
1313
1314    #[test]
1315    fn script_path_stays_inside_workspace_root() {
1316        let script_dir = std::env::temp_dir().join(format!(
1317            "hematite-workspace-workflow-scripts-{}",
1318            std::process::id()
1319        ));
1320        std::fs::create_dir_all(script_dir.join("scripts")).unwrap();
1321        std::fs::write(script_dir.join("scripts").join("dev.ps1"), "Write-Host hi").unwrap();
1322
1323        let command = build_script_path_command(&script_dir, "scripts/dev.ps1").unwrap();
1324        assert!(command.contains("pwsh -ExecutionPolicy Bypass -File"));
1325
1326        let _ = std::fs::remove_file(script_dir.join("scripts").join("dev.ps1"));
1327        let _ = std::fs::remove_dir(script_dir.join("scripts"));
1328        let _ = std::fs::remove_dir(script_dir);
1329    }
1330
1331    #[test]
1332    fn detect_website_launch_plan_prefers_dev_script_and_vite_port() {
1333        let dir = tempfile::tempdir().unwrap();
1334        write_package(
1335            dir.path(),
1336            r#"{
1337                "scripts": { "dev": "vite", "preview": "vite preview" },
1338                "devDependencies": { "vite": "^5.0.0" }
1339            }"#,
1340        );
1341        std::fs::write(dir.path().join("pnpm-lock.yaml"), "").unwrap();
1342
1343        let plan = detect_website_launch_plan(&serde_json::json!({}), dir.path()).unwrap();
1344        assert_eq!(plan.script, "dev");
1345        assert_eq!(plan.command, "pnpm run dev");
1346        assert_eq!(plan.framework_hint, "vite");
1347        assert_eq!(plan.url, "http://127.0.0.1:5173/");
1348    }
1349
1350    #[test]
1351    fn detect_website_launch_plan_honors_preview_mode() {
1352        let dir = tempfile::tempdir().unwrap();
1353        write_package(
1354            dir.path(),
1355            r#"{
1356                "scripts": { "preview": "vite preview" },
1357                "devDependencies": { "vite": "^5.0.0" }
1358            }"#,
1359        );
1360
1361        let plan =
1362            detect_website_launch_plan(&serde_json::json!({ "mode": "preview" }), dir.path())
1363                .unwrap();
1364        assert_eq!(plan.script, "preview");
1365        assert_eq!(plan.url, "http://127.0.0.1:4173/");
1366    }
1367
1368    #[test]
1369    fn extract_html_title_and_preview_are_clean() {
1370        let html = r#"
1371            <html>
1372              <head><title>  Demo Site  </title></head>
1373              <body><h1>Hello</h1><script>ignore()</script><p>Readable preview text.</p></body>
1374            </html>
1375        "#;
1376        assert_eq!(extract_html_title(html).as_deref(), Some("Demo Site"));
1377        let preview = html_preview_text(html);
1378        assert!(preview.contains("Hello"));
1379        assert!(preview.contains("Readable preview text."));
1380        assert!(!preview.contains("ignore()"));
1381    }
1382
1383    #[test]
1384    fn extract_local_asset_urls_resolves_relative_assets() {
1385        let html = r#"
1386            <html>
1387              <head>
1388                <link rel="stylesheet" href="/assets/app.css">
1389                <script src="./main.js"></script>
1390              </head>
1391              <body>
1392                <img src="images/logo.png">
1393                <a href="https://example.com">external</a>
1394              </body>
1395            </html>
1396        "#;
1397        let assets = extract_local_asset_urls("http://127.0.0.1:5173/about/", html);
1398        assert!(assets
1399            .iter()
1400            .any(|asset| asset == "http://127.0.0.1:5173/assets/app.css"));
1401        assert!(assets
1402            .iter()
1403            .any(|asset| asset == "http://127.0.0.1:5173/about/main.js"));
1404        assert!(assets
1405            .iter()
1406            .any(|asset| asset == "http://127.0.0.1:5173/about/images/logo.png"));
1407        assert!(!assets.iter().any(|asset| asset.contains("example.com")));
1408    }
1409
1410    #[test]
1411    fn normalize_route_hints_deduplicates_and_prefixes_slashes() {
1412        let routes = normalize_route_hints(vec![
1413            "".to_string(),
1414            "pricing".to_string(),
1415            "/pricing".to_string(),
1416            "/".to_string(),
1417        ]);
1418        assert_eq!(routes, vec!["/".to_string(), "/pricing".to_string()]);
1419    }
1420
1421    #[tokio::test]
1422    async fn probe_website_once_reads_local_title() {
1423        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1424        let addr = listener.local_addr().unwrap();
1425        std::thread::spawn(move || {
1426            if let Ok((mut stream, _)) = listener.accept() {
1427                use std::io::Read;
1428                let response = b"HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: 67\r\nConnection: close\r\n\r\n<html><head><title>Probe Test</title></head><body>hello</body></html>";
1429                let mut request = [0_u8; 1024];
1430                let _ = stream.read(&mut request);
1431                use std::io::Write;
1432                let _ = stream.write_all(response);
1433            }
1434        });
1435
1436        let client = reqwest::Client::builder()
1437            .timeout(Duration::from_secs(2))
1438            .build()
1439            .unwrap();
1440        let probe = probe_website_once(&client, &format!("http://{}/", addr))
1441            .await
1442            .unwrap();
1443        assert_eq!(probe.status, 200);
1444        assert_eq!(probe.title.as_deref(), Some("Probe Test"));
1445        assert!(probe.body_preview.contains("hello"));
1446    }
1447}